From 70ba186961dc9a6d023e52b20826f1b0ca6d7ac4 Mon Sep 17 00:00:00 2001 From: Emeline Favreau <9661216+EmelineFavreau@users.noreply.github.com> Date: Mon, 3 Jun 2024 11:00:18 +0100 Subject: [PATCH 001/113] typo --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 84aca20a..5558ee64 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -547,4 +547,4 @@ plugins { } ``` -This should go in your Nextflow confgiguration file, specified with `-c ` when running the pipeline. +This should go in your Nextflow configuration file, specified with `-c ` when running the pipeline. From a7ab56a0abb32ab719003609ab25ae044164bab3 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 3 Jun 2024 14:40:21 +0200 Subject: [PATCH 002/113] bump-version 2.2.0dev --- CHANGELOG.md | 11 +++++++++++ assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6854689c..8ec96518 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## 2.2.0 - Dogmatix [XXXX-XX-XX] + +### `Added` + + +### `Changed` + + +### `Fixed` + + ## 2.1.0 - Obelix [2024-05-29] ### `Added` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 02488444..b8bb11a1 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -3,9 +3,9 @@ custom_logo_url: https://github.com/nf-core/raredisease/ custom_logo_title: "nf-core/raredisease" report_comment: > - This report has been generated by the nf-core/raredisease + This report has been generated by the nf-core/raredisease analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-raredisease-methods-description": order: -1000 diff --git a/nextflow.config b/nextflow.config index 928d3a8c..1a1b0a2c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -304,7 +304,7 @@ manifest { description = """call and score variants from WGS/WES of rare disease patients""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.1.0' + version = '2.2.0dev' doi = '' } From 6aeecee6de5c31e653b9c0597a72b13e8ddfa1f5 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 3 Jun 2024 14:58:24 +0200 Subject: [PATCH 003/113] lint --- CHANGELOG.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ec96518..fce9b9d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,13 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - ### `Changed` - ### `Fixed` - ## 2.1.0 - Obelix [2024-05-29] ### `Added` From efb39f6845bc6f5217417e0d1017c6623af539d2 Mon Sep 17 00:00:00 2001 From: Anders Jemt Date: Wed, 5 Jun 2024 18:11:41 +0200 Subject: [PATCH 004/113] adds missinf citations for bwameme --- CHANGELOG.md | 2 ++ CITATIONS.md | 4 ++++ docs/output.md | 7 ++++++- docs/usage.md | 9 +++++---- .../local/utils_nfcore_raredisease_pipeline/main.nf | 2 ++ 5 files changed, 19 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fce9b9d6..67fdb977 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- Citations for bwameme [#563](https://github.com/nf-core/raredisease/pull/563) + ## 2.1.0 - Obelix [2024-05-29] ### `Added` diff --git a/CITATIONS.md b/CITATIONS.md index 36b3cd7b..1db771ac 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -22,6 +22,10 @@ > Vasimuddin Md, Misra S, Li H, Aluru S. Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. In: 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS). IEEE; 2019:314-324. doi:10.1109/IPDPS.2019.00041 +- [BWA-MEME](https://academic.oup.com/bioinformatics/article/38/9/2404/6543607) + + > Jung Y, Han D. BWA-MEME: BWA-MEM emulated with a machine learning approach. Bioinformatics. 2022;38(9):2404-2413. doi:10.1093/bioinformatics/btac137 + - [CADD1](https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-021-00835-9), [2](https://academic.oup.com/nar/article/47/D1/D886/5146191) > Rentzsch P, Schubach M, Shendure J, Kircher M. CADD-Splice—improving genome-wide variant effect prediction using deep learning-derived splice scores. Genome Med. 2021;13(1):31. doi:10.1186/s13073-021-00835-9 diff --git a/docs/output.md b/docs/output.md index 3241f5d4..376acce9 100644 --- a/docs/output.md +++ b/docs/output.md @@ -17,6 +17,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Mapping](#mapping) - [Bwa-mem2](#bwa-mem2) - [BWA](#bwa) + - [BWA-MEME](#bwa-meme) - [Sentieon bwa mem](#sentieon-bwa-mem) - [Duplicate marking](#duplicate-marking) - [Picard's MarkDuplicates](#picards-markduplicates) @@ -88,6 +89,10 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d [BWA](https://github.com/lh3/bwa) used to map the reads to a reference genome. The aligned reads are coordinate sorted with samtools sort. These files are treated as intermediates and are not placed in the output folder by default. It is not the default aligner, but it can be chosen by setting `--aligner` option to bwa. +##### BWA-MEME + +[BWA-MEME](https://github.com/kaist-ina/BWA-MEME) used to map the reads to a reference genome. The aligned reads are coordinate sorted with samtools sort. These files are treated as intermediates and are not placed in the output folder by default. It is not the default aligner, but it can be chosen by setting `--aligner` option to bwameme. + ##### Sentieon bwa mem [Sentieon's bwa mem](https://support.sentieon.com/manual/DNAseq_usage/dnaseq/#map-reads-to-reference) is the software accelerated version of the bwa-mem algorithm. It is used to efficiently perform the alignment using BWA. Aligned reads are then coordinate sorted using Sentieon's [sort](https://support.sentieon.com/manual/usages/general/#util-syntax) utility. These files are treated as intermediates and are not placed in the output folder by default. It is not the default aligner, but it can be chosen by setting `--aligner` option to "sentieon". @@ -96,7 +101,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d ##### Picard's MarkDuplicates -[Picard MarkDuplicates](https://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates) is used for marking PCR duplicates that can occur during library amplification. This is essential as the presence of such duplicates results in false inflated coverages, which in turn can lead to overly-confident genotyping calls during variant calling. Only reads aligned by Bwa-mem2 and bwa are processed by this tool. By default, alignment files are published in bam format. If you would like to store cram files instead, set `--save_mapped_as_cram` to true. +[Picard MarkDuplicates](https://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates) is used for marking PCR duplicates that can occur during library amplification. This is essential as the presence of such duplicates results in false inflated coverages, which in turn can lead to overly-confident genotyping calls during variant calling. Only reads aligned by Bwa-mem2 bwameme and bwa are processed by this tool. By default, alignment files are published in bam format. If you would like to store cram files instead, set `--save_mapped_as_cram` to true.
Output files from Alignment diff --git a/docs/usage.md b/docs/usage.md index 5558ee64..996d0e6a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -139,7 +139,7 @@ Note that the pipeline is modular in architecture. It offers you the flexibility nf-core/raredisease consists of several tools used for various purposes. For convenience, we have grouped those tools under the following categories: -1. Alignment (bwamem2/bwa/Sentieon BWA mem) +1. Alignment (bwamem2/bwa/bwameme/Sentieon BWA mem) 2. QC stats from the alignment files 3. Repeat expansions (ExpansionsHunter & Stranger) 4. Variant calling - SNV (DeepVariant/Sentieon DNAscope) @@ -162,14 +162,15 @@ The mandatory and optional parameters for each category are tabulated below. | aligner1 | fasta_fai4 | | fasta2 | bwamem24 | | platform | bwa4 | -| mito_name/mt_fasta3 | known_dbsnp5 | +| mito_name/mt_fasta3 | bwameme4 | +| | known_dbsnp5 | | | known_dbsnp_tbi5 | | | min_trimmed_length6 | -1Default value is bwamem2. Other alternatives are bwa and sentieon (requires valid Sentieon license ).
+1Default value is bwamem2. Other alternatives are bwa, bwameme and sentieon (requires valid Sentieon license ).
2Analysis set reference genome in fasta format, first 25 contigs need to be chromosome 1-22, X, Y and the mitochondria.
3If mito_name is provided, mt_fasta can be generated by the pipeline.
-4fasta_fai, bwa and bwamem2, if not provided by the user, will be generated by the pipeline when necessary.
+4fasta_fai, bwa, bwamem2 and bwameme, if not provided by the user, will be generated by the pipeline when necessary.
5Used only by Sentieon.
6Default value is 40. Used only by fastp.
diff --git a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf index 36c0cbaa..54ee0a08 100644 --- a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf @@ -217,6 +217,7 @@ def toolCitationText() { align_text = [ params.aligner.equals("bwa") ? "BWA (Li, 2013)," :"", params.aligner.equals("bwamem2") ? "BWA-MEM2 (Vasimuddin et al., 2019)," : "", + params.aligner.equals("bwameme") ? "BWA-MEME (Jung et al., 2022)," : "", params.aligner.equals("sentieon") ? "Sentieon DNASeq (Kendig et al., 2019)," : "", params.aligner.equals("sentieon") ? "Sentieon Tools (Freed et al., 2017)," : "" ] @@ -325,6 +326,7 @@ def toolBibliographyText() { align_text = [ params.aligner.equals("bwa") ? "
  • Li, H. (2013). Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM (arXiv:1303.3997). arXiv. http://arxiv.org/abs/1303.3997
  • " :"", params.aligner.equals("bwamem2") ? "
  • Vasimuddin, Md., Misra, S., Li, H., & Aluru, S. (2019). Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS), 314–324. https://doi.org/10.1109/IPDPS.2019.00041
  • " : "", + params.aligner.equals("bwameme") ? "
  • Jung Y, Han D. BWA-MEME: BWA-MEM emulated with a machine learning approach. Bioinformatics. 2022;38(9):2404-2413. doi:10.1093/bioinformatics/btac137
  • " : "", params.aligner.equals("sentieon") ? "
  • Kendig, K. I., Baheti, S., Bockol, M. A., Drucker, T. M., Hart, S. N., Heldenbrand, J. R., Hernaez, M., Hudson, M. E., Kalmbach, M. T., Klee, E. W., Mattson, N. R., Ross, C. A., Taschuk, M., Wieben, E. D., Wiepert, M., Wildman, D. E., & Mainzer, L. S. (2019). Sentieon DNASeq Variant Calling Workflow Demonstrates Strong Computational Performance and Accuracy. Frontiers in Genetics, 10, 736. https://doi.org/10.3389/fgene.2019.00736
  • " : "", params.aligner.equals("sentieon") ? "
  • Freed, D., Aldana, R., Weber, J. A., & Edwards, J. S. (2017). The Sentieon Genomics Tools—A fast and accurate solution to variant calling from next-generation sequence data (p. 115717). bioRxiv. https://doi.org/10.1101/115717
  • " : "" ] From c568ef79a39f1e79f0dd92c571c41f5100457e37 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 18 Jun 2024 10:27:12 +0200 Subject: [PATCH 005/113] change type and update container --- modules/local/get_chrom_sizes.nf | 4 ++-- modules/local/rename_align_files.nf | 4 ++-- nextflow_schema.json | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/local/get_chrom_sizes.nf b/modules/local/get_chrom_sizes.nf index 4ab80ed1..e84dbe20 100644 --- a/modules/local/get_chrom_sizes.nf +++ b/modules/local/get_chrom_sizes.nf @@ -4,8 +4,8 @@ process GET_CHROM_SIZES { conda "conda-forge::coreutils=8.31" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--0' : - 'biocontainers/gnu-wget:1.18--0' }" + 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h36e9172_9' : + 'biocontainers/gnu-wget:1.18--h36e9172_9' }" input: tuple val(meta), path(fai) diff --git a/modules/local/rename_align_files.nf b/modules/local/rename_align_files.nf index da9f890b..40278ca5 100644 --- a/modules/local/rename_align_files.nf +++ b/modules/local/rename_align_files.nf @@ -4,8 +4,8 @@ process RENAME_ALIGN_FILES { conda "conda-forge::coreutils=8.31" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--0' : - 'biocontainers/gnu-wget:1.18--0' }" + 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h36e9172_9' : + 'biocontainers/gnu-wget:1.18--h36e9172_9' }" input: tuple val(meta), path(input) diff --git a/nextflow_schema.json b/nextflow_schema.json index 6e2fbc81..10e7f354 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -106,8 +106,8 @@ "type": "string", "exists": true, "fa_icon": "fas fa-file", - "description": "A file containing the path to models produced by GATK4 GermlineCNVCaller cohort.", - "format": "file-path", + "description": "A directory containing the models produced by GATK4 GermlineCNVCaller cohort.", + "format": "directory-path", "help_text": "This model is required for generating a cnv calls when using GermlineCNVCaller." }, "genome": { From bf98157390d9307111242ad9d294f00591c347eb Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 18 Jun 2024 12:45:40 +0200 Subject: [PATCH 006/113] revert a change --- nextflow_schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 974b16fd..f9db3a8f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -113,8 +113,8 @@ "type": "string", "exists": true, "fa_icon": "fas fa-file", - "description": "A directory containing the models produced by GATK4 GermlineCNVCaller cohort.", - "format": "directory-path", + "description": "A file containing the path to the models produced by GATK4 GermlineCNVCaller cohort.", + "format": "file-path", "help_text": "This model is required for generating a cnv calls when using GermlineCNVCaller." }, "genome": { From 16c2be05363955b49a81f7c08eee048eb6a509f0 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 18 Jun 2024 12:46:32 +0200 Subject: [PATCH 007/113] fix typo --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index f9db3a8f..80cdfd11 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -113,7 +113,7 @@ "type": "string", "exists": true, "fa_icon": "fas fa-file", - "description": "A file containing the path to the models produced by GATK4 GermlineCNVCaller cohort.", + "description": "A file containing the path to models produced by GATK4 GermlineCNVCaller cohort.", "format": "file-path", "help_text": "This model is required for generating a cnv calls when using GermlineCNVCaller." }, From 71c81b3e8622918b63fd4ed9c12699b2d65f3d3b Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 18 Jun 2024 13:47:01 +0200 Subject: [PATCH 008/113] update bwameme --- modules.json | 2 +- modules/nf-core/bwameme/mem/main.nf | 13 ++-- modules/nf-core/bwameme/mem/meta.yml | 3 + .../nf-core/bwameme/mem/tests/main.nf.test | 15 +++-- .../bwameme/mem/tests/main.nf.test.snap | 65 +++++++++++++++++++ 5 files changed, 86 insertions(+), 12 deletions(-) diff --git a/modules.json b/modules.json index 1b731501..d6310623 100644 --- a/modules.json +++ b/modules.json @@ -82,7 +82,7 @@ }, "bwameme/mem": { "branch": "master", - "git_sha": "79480293280ff4f10f30bdea1ddd903f223f8489", + "git_sha": "c3793385cf559bb60d33e6c3b0cb379a40b26602", "installed_by": ["modules"] }, "cadd": { diff --git a/modules/nf-core/bwameme/mem/main.nf b/modules/nf-core/bwameme/mem/main.nf index db41316e..db91482f 100644 --- a/modules/nf-core/bwameme/mem/main.nf +++ b/modules/nf-core/bwameme/mem/main.nf @@ -12,6 +12,7 @@ process BWAMEME_MEM { tuple val(meta2), path(index) tuple val(meta3), path(fasta) val sort_bam + val mbuffer output: tuple val(meta), path("*.sam") , emit: sam , optional:true @@ -29,14 +30,14 @@ process BWAMEME_MEM { def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def samtools_command = sort_bam ? 'sort' : 'view' - def mbuffer_mem = 3072 - if (!task.memory) { - log.info '[bwameme-mbuffer] Available memory not known - defaulting to 3GB for mbuffer. Specify process memory requirements to change this.' + if (!mbuffer) { + log.info '[bwameme-mbuffer] Memory for mbuffer is not set - defaulting to 3072MB for mbuffer.' + mbuffer_mem = 3072 } else { - mbuffer_mem = (task.memory.mega*0.5).intValue() + mbuffer_mem = mbuffer } - def mbuffer_command = sort_bam ? "| mbuffer -m ${mbuffer_mem}M" : "" - def mem_per_thread = sort_bam ? "-m "+ (mbuffer_mem/task.cpus).intValue()+"M" : "" + mbuffer_command = sort_bam ? "| mbuffer -m ${mbuffer_mem}M" : "" + mem_per_thread = sort_bam ? "-m "+ (mbuffer_mem/task.cpus).intValue()+"M" : "" def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ def extension_matcher = (args2 =~ extension_pattern) def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" diff --git a/modules/nf-core/bwameme/mem/meta.yml b/modules/nf-core/bwameme/mem/meta.yml index c7eb7b28..85a8b5b3 100644 --- a/modules/nf-core/bwameme/mem/meta.yml +++ b/modules/nf-core/bwameme/mem/meta.yml @@ -52,6 +52,9 @@ input: type: boolean description: use samtools sort (true) or samtools view (false) pattern: "true or false" + - mbuffer: + type: integer + description: memory for mbuffer in megabytes output: - meta: diff --git a/modules/nf-core/bwameme/mem/tests/main.nf.test b/modules/nf-core/bwameme/mem/tests/main.nf.test index 3b67b39e..961d6379 100644 --- a/modules/nf-core/bwameme/mem/tests/main.nf.test +++ b/modules/nf-core/bwameme/mem/tests/main.nf.test @@ -11,7 +11,7 @@ nextflow_process { tag "bwameme/index" config "./nextflow.config" - test("sarscov2 - fastq, index, fasta, false") { + test("sarscov2 - fastq, index, fasta, false, 0") { setup { run("BWAMEME_INDEX") { @@ -38,6 +38,7 @@ nextflow_process { input[1] = BWAMEME_INDEX.out.index input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = false + input[4] = 0 """ } } @@ -54,7 +55,7 @@ nextflow_process { } - test("sarscov2 - fastq, index, fasta, true") { + test("sarscov2 - fastq, index, fasta, true, 2048") { setup { run("BWAMEME_INDEX") { @@ -81,6 +82,7 @@ nextflow_process { input[1] = BWAMEME_INDEX.out.index input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = true + input[4] = 2048 """ } } @@ -97,7 +99,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, fasta, false") { + test("sarscov2 - [fastq1, fastq2], index, fasta, false, 0") { setup { run("BWAMEME_INDEX") { @@ -127,6 +129,7 @@ nextflow_process { input[1] = BWAMEME_INDEX.out.index input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = false + input[4] = 0 """ } } @@ -143,7 +146,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, fasta, true") { + test("sarscov2 - [fastq1, fastq2], index, fasta, true, 2048") { setup { run("BWAMEME_INDEX") { @@ -173,6 +176,7 @@ nextflow_process { input[1] = BWAMEME_INDEX.out.index input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = true + input[4] = 2048 """ } } @@ -189,7 +193,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, fasta, true - stub") { + test("sarscov2 - [fastq1, fastq2], index, fasta, true, 2048 - stub") { options "-stub" @@ -221,6 +225,7 @@ nextflow_process { input[1] = BWAMEME_INDEX.out.index input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = true + input[4] = 2048 """ } } diff --git a/modules/nf-core/bwameme/mem/tests/main.nf.test.snap b/modules/nf-core/bwameme/mem/tests/main.nf.test.snap index 281011ae..a8ff281b 100644 --- a/modules/nf-core/bwameme/mem/tests/main.nf.test.snap +++ b/modules/nf-core/bwameme/mem/tests/main.nf.test.snap @@ -1,4 +1,17 @@ { + "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-18T10:17:31.035368735" + }, "sarscov2 - [fastq1, fastq2], index, fasta, false": { "content": [ "test.bam", @@ -25,6 +38,19 @@ }, "timestamp": "2024-05-15T19:28:46.895668666" }, + "sarscov2 - [fastq1, fastq2], index, fasta, false, 0": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-18T10:17:08.497131484" + }, "sarscov2 - [fastq1, fastq2], index, fasta, true": { "content": [ "test.bam", @@ -38,6 +64,32 @@ }, "timestamp": "2024-05-15T20:44:56.510177191" }, + "sarscov2 - fastq, index, fasta, false, 0": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-18T10:16:23.45126091" + }, + "sarscov2 - fastq, index, fasta, true, 2048": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-18T10:16:46.541148031" + }, "sarscov2 - fastq, index, fasta, false": { "content": [ "test.bam", @@ -63,5 +115,18 @@ "nextflow": "23.10.1" }, "timestamp": "2024-05-15T20:44:05.2657749" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048 - stub": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-18T10:17:40.514767321" } } \ No newline at end of file From 03a71f6fc409996f5f4ead0f3f828e84b0db6708 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 18 Jun 2024 14:03:25 +0200 Subject: [PATCH 009/113] update bwameme --- nextflow.config | 1 + nextflow_schema.json | 7 +++++++ subworkflows/local/align.nf | 2 ++ subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf | 3 ++- workflows/raredisease.nf | 1 + 5 files changed, 13 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 928d3a8c..b6defbc2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -53,6 +53,7 @@ params { // Alignment aligner = 'bwamem2' + mbuffer_mem = 3072 min_trimmed_length = 40 mt_subsample_rd = 150 mt_subsample_seed = 30 diff --git a/nextflow_schema.json b/nextflow_schema.json index 80cdfd11..47f36d06 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -605,6 +605,13 @@ "fa_icon": "fas fa-align-center", "enum": ["bwa", "bwamem2", "bwameme", "sentieon"] }, + "mbuffer_mem": { + "type": "integer", + "default": 3072, + "description": "Memory allocated for mbuffer in megabytes (used only by bwameme)", + "help_text": "To know more about this parameter check [bwameme](https://github.com/kaist-ina/BWA-MEME?tab=readme-ov-file#building-pipeline-with-samtools) documentation.", + "fa_icon": "fas fa-less-than" + }, "min_trimmed_length": { "type": "integer", "default": 40, diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 8822d1cb..db7b0bfc 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -25,6 +25,7 @@ workflow ALIGN { ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_mtshift_dictionary // channel: [mandatory] [ val(meta), path(dict) ] ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ] + val_mbuffer_mem // integer: [mandatory] memory in megabytes val_platform // string: [mandatory] illumina or a different technology main: @@ -56,6 +57,7 @@ workflow ALIGN { ch_genome_bwamemeindex, ch_genome_fasta, ch_genome_fai, + val_mbuffer_mem, val_platform ) ch_bwamem2_bam = ALIGN_BWA_BWAMEM2_BWAMEME.out.marked_bam diff --git a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf index 7d635d51..b11f589f 100644 --- a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf +++ b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf @@ -21,6 +21,7 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME { ch_bwameme_index // channel: [mandatory] [ val(meta), path(bwamem2_index) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + val_mbuffer_mem // integer: [mandatory] default: 3072 val_platform // string: [mandatory] default: illumina main: @@ -32,7 +33,7 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME { ch_align = BWA.out.bam ch_versions = ch_versions.mix(BWA.out.versions.first()) } else if (params.aligner.equals("bwameme")) { - BWAMEME_MEM ( ch_reads_input, ch_bwameme_index, ch_genome_fasta, true ) + BWAMEME_MEM ( ch_reads_input, ch_bwameme_index, ch_genome_fasta, true, val_mbuffer_mem ) ch_align = BWAMEME_MEM.out.bam ch_versions = ch_versions.mix(BWAMEME_MEM.out.versions.first()) } else { diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index ab09b6f2..ee00ee66 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -370,6 +370,7 @@ workflow RAREDISEASE { ch_mtshift_fasta, ch_mtshift_dictionary, ch_mtshift_fai, + params.mbuffer_mem, params.platform ) .set { ch_mapped } From 98ed2cdc3ea0ac0d4582ce330c307417cad4cd40 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 15:53:36 +0200 Subject: [PATCH 010/113] remove readcount interval generation --- nextflow_schema.json | 2 +- subworkflows/local/prepare_references.nf | 30 +++++++++------------ workflows/raredisease.nf | 34 +++++++++++++----------- 3 files changed, 31 insertions(+), 35 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 47f36d06..032569bf 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -281,7 +281,7 @@ "fa_icon": "fas fa-file", "description": "Interval list file containing the intervals over which read counts are tabulated for CNV calling", "format": "file-path", - "help_text": "Generated by GATK4 preprocessintervals. If absent, pipeline can generate this file." + "help_text": "Generated by GATK4 preprocessintervals." }, "reduced_penetrance": { "type": "string", diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 2bd4b6dd..36445800 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -31,13 +31,14 @@ include { UNTAR as UNTAR_VEP_CACHE } from '../../modul workflow PREPARE_REFERENCES { take: - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_mt_fasta // channel: [mandatory for dedicated mt analysis] [ val(meta), path(fasta) ] - ch_gnomad_af_tab // channel: [optional; used in for snv annotation] [ val(meta), path(tab) ] - ch_known_dbsnp // channel: [optional; used only by sentieon] [ val(meta), path(vcf) ] - ch_target_bed // channel: [mandatory for WES] [ path(bed) ] - ch_vep_cache // channel: [mandatory for annotation] [ path(cache) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_genome_dictionary // channel: [mandatory] [ val(meta), path(fai) ] + ch_mt_fasta // channel: [mandatory for dedicated mt analysis] [ val(meta), path(fasta) ] + ch_gnomad_af_tab // channel: [optional; used in for snv annotation] [ val(meta), path(tab) ] + ch_known_dbsnp // channel: [optional; used only by sentieon] [ val(meta), path(vcf) ] + ch_target_bed // channel: [mandatory for WES] [ path(bed) ] + ch_vep_cache // channel: [mandatory for annotation] [ path(cache) ] main: ch_versions = Channel.empty() @@ -49,7 +50,8 @@ workflow PREPARE_REFERENCES { // Genome indices SAMTOOLS_FAIDX_GENOME(ch_genome_fasta, [[],[]]) GATK_SD(ch_genome_fasta) - ch_fai = Channel.empty().mix(ch_genome_fai, SAMTOOLS_FAIDX_GENOME.out.fai).collect() + ch_fai = Channel.empty().mix(ch_genome_fai, SAMTOOLS_FAIDX_GENOME.out.fai).collect() + ch_dict = Channel.empty().mix(ch_genome_dictionary, GATK_SD.out.dict).collect() GET_CHROM_SIZES( ch_fai ) // Genome alignment indices @@ -89,7 +91,7 @@ workflow PREPARE_REFERENCES { TABIX_PBT(ch_target_bed).gz_tbi.set { ch_bgzip_tbi } // Generate bait and target intervals - GATK_BILT(ch_target_bed, GATK_SD.out.dict).interval_list + GATK_BILT(ch_target_bed, ch_dict).interval_list GATK_ILT(GATK_BILT.out.interval_list) GATK_ILT.out.interval_list .collect{ it[1] } @@ -101,10 +103,6 @@ workflow PREPARE_REFERENCES { CAT_CAT_BAIT ( ch_bait_intervals_cat_in ) UNTAR_VEP_CACHE (ch_vep_cache) - //cnvcalling intervals - GATK_PREPROCESS_WGS (ch_genome_fasta, ch_fai, GATK_SD.out.dict, [[],[]], [[],[]]).set {ch_preprocwgs} - GATK_PREPROCESS_WES (ch_genome_fasta, ch_fai, GATK_SD.out.dict, GATK_BILT.out.interval_list, [[],[]]).set {ch_preprocwes} - // RTG tools ch_genome_fasta.map { meta, fasta -> return [meta, fasta, [], [] ] } .set {ch_rtgformat_in} @@ -134,8 +132,6 @@ workflow PREPARE_REFERENCES { ch_versions = ch_versions.mix(GATK_ILT.out.versions) ch_versions = ch_versions.mix(CAT_CAT_BAIT.out.versions) ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions) - ch_versions = ch_versions.mix(GATK_PREPROCESS_WGS.out.versions) - ch_versions = ch_versions.mix(GATK_PREPROCESS_WES.out.versions) ch_versions = ch_versions.mix(RTGTOOLS_FORMAT.out.versions) emit: @@ -144,9 +140,7 @@ workflow PREPARE_REFERENCES { genome_bwameme_index = BWAMEME_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ] genome_chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ] genome_fai = ch_fai // channel: [ val(meta), path(fai) ] - genome_dict = GATK_SD.out.dict.collect() // channel: [ path(dict) ] - readcount_intervals = Channel.empty() - .mix(ch_preprocwgs.interval_list,ch_preprocwes.interval_list)// channel: [ path(intervals) ] + genome_dict = ch_dict // channel: [ val(meta), path(dict) ] sdf = RTGTOOLS_FORMAT.out.sdf // channel: [ val (meta), path(intervals) ] mt_intervals = ch_shiftfasta_mtintervals.intervals.collect() // channel: [ path(intervals) ] mtshift_intervals = ch_shiftfasta_mtintervals.shift_intervals.collect() // channel: [ path(intervals) ] diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index ee00ee66..bc5fef99 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -58,7 +58,7 @@ if (params.variant_caller.equals("sentieon")) { } if (!params.skip_germlinecnvcaller) { - mandatoryParams += ["ploidy_model", "gcnvcaller_model"] + mandatoryParams += ["ploidy_model", "gcnvcaller_model", "readcount_intervals"] } if (!params.skip_vep_filter) { @@ -171,24 +171,27 @@ workflow RAREDISEASE { ch_case_info = ch_samples.toList().map { CustomFunctions.createCaseChannel(it) } // Initialize file channels for PREPARE_REFERENCES subworkflow - ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect() - ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() - : Channel.empty() - ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([[],[]]) - ch_dbsnp = params.known_dbsnp ? Channel.fromPath(params.known_dbsnp).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([[],[]]) - ch_mt_fasta = params.mt_fasta ? Channel.fromPath(params.mt_fasta).map { it -> [[id:it[0].simpleName], it] }.collect() - : Channel.empty() - ch_target_bed_unprocessed = params.target_bed ? Channel.fromPath(params.target_bed).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([[],[]]) - ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect() - : Channel.value([[],[]]) + ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect() + ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.empty() + ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.empty() + ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_dbsnp = params.known_dbsnp ? Channel.fromPath(params.known_dbsnp).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_mt_fasta = params.mt_fasta ? Channel.fromPath(params.mt_fasta).map { it -> [[id:it[0].simpleName], it] }.collect() + : Channel.empty() + ch_target_bed_unprocessed = params.target_bed ? Channel.fromPath(params.target_bed).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect() + : Channel.value([[],[]]) // Prepare references and indices. PREPARE_REFERENCES ( ch_genome_fasta, ch_genome_fai, + ch_genome_dictionary, ch_mt_fasta, ch_gnomad_af_tab, ch_dbsnp, @@ -220,8 +223,7 @@ workflow RAREDISEASE { : ch_references.genome_bwameme_index ch_genome_chrsizes = ch_references.genome_chrom_sizes ch_genome_fai = ch_references.genome_fai - ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.genome_dict + ch_genome_dictionary = ch_references.genome_dict ch_gens_gnomad_pos = params.gens_gnomad_pos ? Channel.fromPath(params.gens_gnomad_pos).collect() : Channel.empty() ch_gens_interval_list = params.gens_interval_list ? Channel.fromPath(params.gens_interval_list).collect() From 27cf8fb946790b34186581b4f1a1b099119110f6 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 17:48:21 +0200 Subject: [PATCH 011/113] update module --- modules.json | 2 +- modules/nf-core/bwameme/mem/main.nf | 13 ++- modules/nf-core/bwameme/mem/meta.yml | 5 +- .../nf-core/bwameme/mem/tests/main.nf.test | 15 ++- .../bwameme/mem/tests/main.nf.test.snap | 108 +++++++++++++++--- 5 files changed, 118 insertions(+), 25 deletions(-) diff --git a/modules.json b/modules.json index d6310623..4322c732 100644 --- a/modules.json +++ b/modules.json @@ -82,7 +82,7 @@ }, "bwameme/mem": { "branch": "master", - "git_sha": "c3793385cf559bb60d33e6c3b0cb379a40b26602", + "git_sha": "0aa157a00b54bcbe2c50be375cafd68d928e7f4d", "installed_by": ["modules"] }, "cadd": { diff --git a/modules/nf-core/bwameme/mem/main.nf b/modules/nf-core/bwameme/mem/main.nf index db91482f..2efc8c0f 100644 --- a/modules/nf-core/bwameme/mem/main.nf +++ b/modules/nf-core/bwameme/mem/main.nf @@ -13,6 +13,7 @@ process BWAMEME_MEM { tuple val(meta3), path(fasta) val sort_bam val mbuffer + val samtools_threads output: tuple val(meta), path("*.sam") , emit: sam , optional:true @@ -31,13 +32,19 @@ process BWAMEME_MEM { def prefix = task.ext.prefix ?: "${meta.id}" def samtools_command = sort_bam ? 'sort' : 'view' if (!mbuffer) { - log.info '[bwameme-mbuffer] Memory for mbuffer is not set - defaulting to 3072MB for mbuffer.' + log.info '[bwameme-mbuffer] Memory for mbuffer is not set - defaulting to 3GB for mbuffer.' mbuffer_mem = 3072 } else { mbuffer_mem = mbuffer } + if (!samtools_threads) { + log.info 'Number of threads for samtools is not set - defaulting to 2 threads.' + threads = 2 + } else { + threads = samtools_threads + } mbuffer_command = sort_bam ? "| mbuffer -m ${mbuffer_mem}M" : "" - mem_per_thread = sort_bam ? "-m "+ (mbuffer_mem/task.cpus).intValue()+"M" : "" + mem_per_thread = sort_bam ? "-m "+ (mbuffer_mem/threads).intValue()+"M" : "" def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ def extension_matcher = (args2 =~ extension_pattern) def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" @@ -54,7 +61,7 @@ process BWAMEME_MEM { \$INDEX \\ $reads \\ $mbuffer_command \\ - | samtools $samtools_command $args2 $mem_per_thread -@ $task.cpus ${reference} -o ${prefix}.${extension} - + | samtools $samtools_command $args2 $mem_per_thread -@ $threads ${reference} -o ${prefix}.${extension} - cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bwameme/mem/meta.yml b/modules/nf-core/bwameme/mem/meta.yml index 85a8b5b3..e5d28db2 100644 --- a/modules/nf-core/bwameme/mem/meta.yml +++ b/modules/nf-core/bwameme/mem/meta.yml @@ -54,7 +54,10 @@ input: pattern: "true or false" - mbuffer: type: integer - description: memory for mbuffer in megabytes + description: memory for mbuffer in megabytes (default 3072) + - sort_threads: + type: integer + description: number of threads to used during samtools sort (default 2). output: - meta: diff --git a/modules/nf-core/bwameme/mem/tests/main.nf.test b/modules/nf-core/bwameme/mem/tests/main.nf.test index 961d6379..8175f58a 100644 --- a/modules/nf-core/bwameme/mem/tests/main.nf.test +++ b/modules/nf-core/bwameme/mem/tests/main.nf.test @@ -11,7 +11,7 @@ nextflow_process { tag "bwameme/index" config "./nextflow.config" - test("sarscov2 - fastq, index, fasta, false, 0") { + test("sarscov2 - fastq, index, fasta, false, 0, 4") { setup { run("BWAMEME_INDEX") { @@ -39,6 +39,7 @@ nextflow_process { input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = false input[4] = 0 + input[5] = 4 """ } } @@ -55,7 +56,7 @@ nextflow_process { } - test("sarscov2 - fastq, index, fasta, true, 2048") { + test("sarscov2 - fastq, index, fasta, true, 2048, 4") { setup { run("BWAMEME_INDEX") { @@ -83,6 +84,7 @@ nextflow_process { input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = true input[4] = 2048 + input[5] = 4 """ } } @@ -99,7 +101,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, fasta, false, 0") { + test("sarscov2 - [fastq1, fastq2], index, fasta, false, 0, 4") { setup { run("BWAMEME_INDEX") { @@ -130,6 +132,7 @@ nextflow_process { input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = false input[4] = 0 + input[5] = 4 """ } } @@ -146,7 +149,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, fasta, true, 2048") { + test("sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, ''") { setup { run("BWAMEME_INDEX") { @@ -177,6 +180,7 @@ nextflow_process { input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = true input[4] = 2048 + input[5] = "" """ } } @@ -193,7 +197,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, fasta, true, 2048 - stub") { + test("sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, 4 - stub") { options "-stub" @@ -226,6 +230,7 @@ nextflow_process { input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = true input[4] = 2048 + input[5] = 4 """ } } diff --git a/modules/nf-core/bwameme/mem/tests/main.nf.test.snap b/modules/nf-core/bwameme/mem/tests/main.nf.test.snap index a8ff281b..55235959 100644 --- a/modules/nf-core/bwameme/mem/tests/main.nf.test.snap +++ b/modules/nf-core/bwameme/mem/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048": { + "sarscov2 - [fastq1, fastq2], index, fasta, false, 0, 4": { "content": [ "test.bam", [ @@ -10,7 +10,7 @@ "nf-test": "0.8.4", "nextflow": "24.04.2" }, - "timestamp": "2024-06-18T10:17:31.035368735" + "timestamp": "2024-06-20T16:07:51.065498711" }, "sarscov2 - [fastq1, fastq2], index, fasta, false": { "content": [ @@ -25,7 +25,33 @@ }, "timestamp": "2024-05-15T20:04:31.962017214" }, - "sarscov2 - [fastq1, fastq2], index, fasta, true - stub": { + "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, 4": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-20T16:08:18.378362535" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, ''": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-20T16:25:43.613918051" + }, + "sarscov2 - fastq, index, fasta, false": { "content": [ "test.bam", [ @@ -36,9 +62,9 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-05-15T19:28:46.895668666" + "timestamp": "2024-05-15T20:00:05.782384898" }, - "sarscov2 - [fastq1, fastq2], index, fasta, false, 0": { + "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048 - stub": { "content": [ "test.bam", [ @@ -49,9 +75,35 @@ "nf-test": "0.8.4", "nextflow": "24.04.2" }, - "timestamp": "2024-06-18T10:17:08.497131484" + "timestamp": "2024-06-18T10:17:40.514767321" }, - "sarscov2 - [fastq1, fastq2], index, fasta, true": { + "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-18T10:17:31.035368735" + }, + "sarscov2 - fastq, index, fasta, true, 2048, 4": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-20T16:07:24.071789902" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true - stub": { "content": [ "test.bam", [ @@ -62,7 +114,20 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-05-15T20:44:56.510177191" + "timestamp": "2024-05-15T19:28:46.895668666" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, false, 0": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-18T10:17:08.497131484" }, "sarscov2 - fastq, index, fasta, false, 0": { "content": [ @@ -77,6 +142,19 @@ }, "timestamp": "2024-06-18T10:16:23.45126091" }, + "sarscov2 - [fastq1, fastq2], index, fasta, true": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-15T20:44:56.510177191" + }, "sarscov2 - fastq, index, fasta, true, 2048": { "content": [ "test.bam", @@ -90,7 +168,7 @@ }, "timestamp": "2024-06-18T10:16:46.541148031" }, - "sarscov2 - fastq, index, fasta, false": { + "sarscov2 - fastq, index, fasta, true": { "content": [ "test.bam", [ @@ -101,9 +179,9 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-05-15T20:00:05.782384898" + "timestamp": "2024-05-15T20:44:05.2657749" }, - "sarscov2 - fastq, index, fasta, true": { + "sarscov2 - fastq, index, fasta, false, 0, 4": { "content": [ "test.bam", [ @@ -112,11 +190,11 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-05-15T20:44:05.2657749" + "timestamp": "2024-06-20T16:06:58.802149967" }, - "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048 - stub": { + "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, 4 - stub": { "content": [ "test.bam", [ @@ -127,6 +205,6 @@ "nf-test": "0.8.4", "nextflow": "24.04.2" }, - "timestamp": "2024-06-18T10:17:40.514767321" + "timestamp": "2024-06-20T16:08:28.453969552" } } \ No newline at end of file From 481f9f53191d2546609975a3d57326713043754a Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 18:14:49 +0200 Subject: [PATCH 012/113] add sort threads option --- nextflow.config | 1 + nextflow_schema.json | 7 +++++++ subworkflows/local/align.nf | 4 +++- subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf | 4 ++-- workflows/raredisease.nf | 3 ++- 5 files changed, 15 insertions(+), 4 deletions(-) diff --git a/nextflow.config b/nextflow.config index b6defbc2..f109b654 100644 --- a/nextflow.config +++ b/nextflow.config @@ -54,6 +54,7 @@ params { // Alignment aligner = 'bwamem2' mbuffer_mem = 3072 + samtools_sort_threads = 4 min_trimmed_length = 40 mt_subsample_rd = 150 mt_subsample_seed = 30 diff --git a/nextflow_schema.json b/nextflow_schema.json index 032569bf..7004ba9e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -605,6 +605,13 @@ "fa_icon": "fas fa-align-center", "enum": ["bwa", "bwamem2", "bwameme", "sentieon"] }, + "samtools_sort_threads": { + "type": "integer", + "default": 4, + "description": "Number of threads allocated for sorting alignment files (used only by bwameme)", + "help_text": "To know more about this parameter check [bwameme](https://github.com/kaist-ina/BWA-MEME?tab=readme-ov-file#building-pipeline-with-samtools) documentation.", + "fa_icon": "fas fa-less-than" + }, "mbuffer_mem": { "type": "integer", "default": 3072, diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index db7b0bfc..2163767c 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -27,6 +27,7 @@ workflow ALIGN { ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ] val_mbuffer_mem // integer: [mandatory] memory in megabytes val_platform // string: [mandatory] illumina or a different technology + val_sort_threads // integer: [mandatory] memory in megabytes main: ch_bwamem2_bam = Channel.empty() @@ -58,7 +59,8 @@ workflow ALIGN { ch_genome_fasta, ch_genome_fai, val_mbuffer_mem, - val_platform + val_platform, + val_sort_threads ) ch_bwamem2_bam = ALIGN_BWA_BWAMEM2_BWAMEME.out.marked_bam ch_bwamem2_bai = ALIGN_BWA_BWAMEM2_BWAMEME.out.marked_bai diff --git a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf index b11f589f..15d3db9a 100644 --- a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf +++ b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf @@ -23,7 +23,7 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME { ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] val_mbuffer_mem // integer: [mandatory] default: 3072 val_platform // string: [mandatory] default: illumina - + val_sort_threads // integer: [mandatory] default: 4 main: ch_versions = Channel.empty() @@ -33,7 +33,7 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME { ch_align = BWA.out.bam ch_versions = ch_versions.mix(BWA.out.versions.first()) } else if (params.aligner.equals("bwameme")) { - BWAMEME_MEM ( ch_reads_input, ch_bwameme_index, ch_genome_fasta, true, val_mbuffer_mem ) + BWAMEME_MEM ( ch_reads_input, ch_bwameme_index, ch_genome_fasta, true, val_mbuffer_mem, val_sort_threads ) ch_align = BWAMEME_MEM.out.bam ch_versions = ch_versions.mix(BWAMEME_MEM.out.versions.first()) } else { diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index bc5fef99..361bbee2 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -373,7 +373,8 @@ workflow RAREDISEASE { ch_mtshift_dictionary, ch_mtshift_fai, params.mbuffer_mem, - params.platform + params.platform, + params.samtools_sort_threads ) .set { ch_mapped } ch_versions = ch_versions.mix(ALIGN.out.versions) From 5296b877a37249ed2de4ce3d0667a164731aa12a Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 18:44:49 +0200 Subject: [PATCH 013/113] update changelog --- CHANGELOG.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 67fdb977..e29858c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,16 +3,28 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## 2.2.0 - Dogmatix [XXXX-XX-XX] +## 2.2.0dev - Dogmatix [XXXX-XX-XX] ### `Added` +- Two new parameters `mbuffer_mem` and `samtools_sort_threads` to control resources given to mbuffer and samtools sort in the bwameme module [#564](https://github.com/nf-core/raredisease/pull/564) + ### `Changed` +- `readcount_intervals` parameter is now manadatory for running germlinecnvcaller. [#564](https://github.com/nf-core/raredisease/pull/564) + ### `Fixed` +- Docker manifest error from gnu-wget container [#564](https://github.com/nf-core/raredisease/pull/564) - Citations for bwameme [#563](https://github.com/nf-core/raredisease/pull/563) +### Parameters + +| Old parameter | New parameter | +| ------------- | --------------------- | +| | mbuffer_mem | +| | samtools_sort_threads | + ## 2.1.0 - Obelix [2024-05-29] ### `Added` From 9c74555c1d21d7e0290ad0962f0b9f5a68d21801 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 18:47:31 +0200 Subject: [PATCH 014/113] update changelog --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e29858c7..8e998348 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,15 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- Two new parameters `mbuffer_mem` and `samtools_sort_threads` to control resources given to mbuffer and samtools sort in the bwameme module [#564](https://github.com/nf-core/raredisease/pull/564) +- Two new parameters `mbuffer_mem` and `samtools_sort_threads` to control resources given to mbuffer and samtools sort in the bwameme module [#570](https://github.com/nf-core/raredisease/pull/570) ### `Changed` -- `readcount_intervals` parameter is now manadatory for running germlinecnvcaller. [#564](https://github.com/nf-core/raredisease/pull/564) +- `readcount_intervals` parameter is now manadatory for running germlinecnvcaller. [#570](https://github.com/nf-core/raredisease/pull/570) ### `Fixed` -- Docker manifest error from gnu-wget container [#564](https://github.com/nf-core/raredisease/pull/564) +- Docker manifest error from gnu-wget container [#570](https://github.com/nf-core/raredisease/pull/570) - Citations for bwameme [#563](https://github.com/nf-core/raredisease/pull/563) ### Parameters From 798a150a6ed8ce06f95c14d9b3c9b903bae1106d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 21:31:34 +0200 Subject: [PATCH 015/113] fix error [skip ci] --- conf/modules/prepare_references.config | 15 --------------- subworkflows/local/prepare_references.nf | 2 -- 2 files changed, 17 deletions(-) diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config index 9ec4d47b..10fda4a3 100644 --- a/conf/modules/prepare_references.config +++ b/conf/modules/prepare_references.config @@ -124,19 +124,4 @@ process { ext.when = { (params.vep_cache && params.vep_cache.endsWith("tar.gz")) } } - withName: '.*PREPARE_REFERENCES:GATK_PREPROCESS_WGS' { - ext.args = { [ - '--padding 0', - '--interval-merging-rule OVERLAPPING_ONLY', - "--exclude-intervals ${params.mito_name}", - "--tmp-dir ./" - ].join(' ') } - ext.when = { params.analysis_type.equals("wgs") && !params.readcount_intervals } - } - - withName: '.*PREPARE_REFERENCES:GATK_PREPROCESS_WES' { - ext.args = { "--bin-length 0 --interval-merging-rule OVERLAPPING_ONLY --exclude-intervals ${params.mito_name}" } - ext.when = { params.analysis_type.equals("wes") && !params.readcount_intervals } - } - } diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 36445800..7c7726b4 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -13,8 +13,6 @@ include { GATK4_BEDTOINTERVALLIST as GATK_BILT } from '../../modul include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD } from '../../modules/nf-core/gatk4/createsequencedictionary/main' include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD_MT_SHIFT } from '../../modules/nf-core/gatk4/createsequencedictionary/main' include { GATK4_INTERVALLISTTOOLS as GATK_ILT } from '../../modules/nf-core/gatk4/intervallisttools/main' -include { GATK4_PREPROCESSINTERVALS as GATK_PREPROCESS_WGS } from '../../modules/nf-core/gatk4/preprocessintervals/main.nf' -include { GATK4_PREPROCESSINTERVALS as GATK_PREPROCESS_WES } from '../../modules/nf-core/gatk4/preprocessintervals/main.nf' include { GATK4_SHIFTFASTA as GATK_SHIFTFASTA } from '../../modules/nf-core/gatk4/shiftfasta/main' include { GET_CHROM_SIZES } from '../../modules/local/get_chrom_sizes' include { RTGTOOLS_FORMAT } from '../../modules/nf-core/rtgtools/format/main' From 3cbe7a77afe26bd0ee04ecd10439cb1ceaf7002a Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 21:33:44 +0200 Subject: [PATCH 016/113] remove module --- modules.json | 5 -- .../gatk4/preprocessintervals/environment.yml | 7 -- .../nf-core/gatk4/preprocessintervals/main.nf | 62 -------------- .../gatk4/preprocessintervals/meta.yml | 82 ------------------- 4 files changed, 156 deletions(-) delete mode 100644 modules/nf-core/gatk4/preprocessintervals/environment.yml delete mode 100644 modules/nf-core/gatk4/preprocessintervals/main.nf delete mode 100644 modules/nf-core/gatk4/preprocessintervals/meta.yml diff --git a/modules.json b/modules.json index 4322c732..e400f22d 100644 --- a/modules.json +++ b/modules.json @@ -206,11 +206,6 @@ "git_sha": "cf607b7749da0a8f5ca2a1e31233e13e3159e2fe", "installed_by": ["modules"] }, - "gatk4/preprocessintervals": { - "branch": "master", - "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] - }, "gatk4/printreads": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", diff --git a/modules/nf-core/gatk4/preprocessintervals/environment.yml b/modules/nf-core/gatk4/preprocessintervals/environment.yml deleted file mode 100644 index ec0b09e9..00000000 --- a/modules/nf-core/gatk4/preprocessintervals/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: gatk4_preprocessintervals -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/preprocessintervals/main.nf b/modules/nf-core/gatk4/preprocessintervals/main.nf deleted file mode 100644 index dffc4bb1..00000000 --- a/modules/nf-core/gatk4/preprocessintervals/main.nf +++ /dev/null @@ -1,62 +0,0 @@ -process GATK4_PREPROCESSINTERVALS { - tag "$fasta" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" - - input: - tuple val(meta), path(fasta) - tuple val(meta2), path(fai) - tuple val(meta3), path(dict) - tuple val(meta4), path(intervals) - tuple val(meta5), path(exclude_intervals) - - output: - tuple val(meta), path("*.interval_list"), emit: interval_list - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def include_command = intervals ? "--intervals $intervals" : "" - def exclude_command = exclude_intervals ? "--exclude-intervals $exclude_intervals" : "" - - def avail_mem = 3072 - if (!task.memory) { - log.info '[GATK PreprocessIntervals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() - } - - """ - gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ - PreprocessIntervals \\ - $include_command \\ - $exclude_command \\ - --reference $fasta \\ - --output ${prefix}.interval_list \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.interval_list - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/gatk4/preprocessintervals/meta.yml b/modules/nf-core/gatk4/preprocessintervals/meta.yml deleted file mode 100644 index cf3f6ac4..00000000 --- a/modules/nf-core/gatk4/preprocessintervals/meta.yml +++ /dev/null @@ -1,82 +0,0 @@ -name: "gatk4_preprocessintervals" -description: Prepares bins for coverage collection. -keywords: - - bed - - gatk4 - - interval - - preprocessintervals -tools: - - "gatk4": - description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools with a primary focus on variant discovery and genotyping. Its powerful processing engine and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: "10.1158/1538-7445.AM2017-3590" - licence: ["Apache-2.0"] -input: - - meta: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fai: - type: file - description: Index of reference fasta file - pattern: "*.fasta.fai" - - meta3: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - dict: - type: file - description: GATK sequence dictionary - pattern: "*.dict" - - meta4: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - intervals: - type: file - description: Interval file (bed or interval_list) with the genomic regions to be included from the analysis (optional) - pattern: "*.{bed,interval_list}" - - meta5: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - exclude_intervals: - type: file - description: Interval file (bed or interval_list) with the genomic regions to be excluded from the analysis (optional) - pattern: "*.{bed,interval_list}" -output: - - meta: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - interval_list: - type: file - description: Processed interval list file - pattern: "*.{bed,interval_list}" -authors: - - "@ryanjameskennedy" - - "@ViktorHy" - - "@ramprasadn" -maintainers: - - "@ryanjameskennedy" - - "@ViktorHy" - - "@ramprasadn" From 306d03b8ba4885197a94a18a0b3e44497c14a658 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 22:08:10 +0200 Subject: [PATCH 017/113] skip repeat analysis --- nextflow.config | 3 +-- nextflow_schema.json | 5 +++++ workflows/raredisease.nf | 22 ++++++++++++---------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/nextflow.config b/nextflow.config index 78a92109..66588359 100644 --- a/nextflow.config +++ b/nextflow.config @@ -38,10 +38,9 @@ params { skip_me_annotation = false skip_mt_annotation = false skip_qualimap = false + skip_repeat_analysis = false skip_snv_annotation = false skip_sv_annotation = false - skip_me_annotation = false - skip_mt_annotation = false skip_mt_subsample = false skip_vcf2cytosure = true skip_vep_filter = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 7004ba9e..4a7a459d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -568,6 +568,11 @@ "description": "Specifies whether or not to subsample mt alignment.", "fa_icon": "fas fa-toggle-on" }, + "skip_repeat_analysis": { + "type": "boolean", + "description": "Specifies whether or not to skip calling and annotation of repeat expansions.", + "fa_icon": "fas fa-toggle-on" + }, "skip_snv_annotation": { "type": "boolean", "description": "Specifies whether or not to skip annotate SNV subworkflow.", diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 361bbee2..e9ef671c 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -258,7 +258,7 @@ workflow RAREDISEASE { ch_ploidy_model = params.ploidy_model ? Channel.fromPath(params.ploidy_model).map{ it -> [[id:it[0].simpleName], it] }.collect() : Channel.empty() ch_readcount_intervals = params.readcount_intervals ? Channel.fromPath(params.readcount_intervals).collect() - : ( ch_references.readcount_intervals ?: Channel.empty() ) + : Channel.empty() ch_reduced_penetrance = params.reduced_penetrance ? Channel.fromPath(params.reduced_penetrance).collect() : Channel.value([]) ch_rtg_truthvcfs = params.rtg_truthvcfs ? Channel.fromPath(params.rtg_truthvcfs).collect() @@ -409,15 +409,17 @@ workflow RAREDISEASE { // // EXPANSIONHUNTER AND STRANGER // - if (params.analysis_type.equals("wgs")) { - CALL_REPEAT_EXPANSIONS ( - ch_mapped.genome_bam_bai, - ch_variant_catalog, - ch_case_info, - ch_genome_fasta, - ch_genome_fai - ) - ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions) + if (!params.skip_repeat_analysis) { + if ( params.analysis_type.equals("wgs") ) { + CALL_REPEAT_EXPANSIONS ( + ch_mapped.genome_bam_bai, + ch_variant_catalog, + ch_case_info, + ch_genome_fasta, + ch_genome_fai + ) + ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions) + } } // From aa4c0e651a658541b56a3a1614a81ccaa9eda88d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 22:09:52 +0200 Subject: [PATCH 018/113] fix error --- workflows/raredisease.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 361bbee2..2b6e3483 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -258,7 +258,7 @@ workflow RAREDISEASE { ch_ploidy_model = params.ploidy_model ? Channel.fromPath(params.ploidy_model).map{ it -> [[id:it[0].simpleName], it] }.collect() : Channel.empty() ch_readcount_intervals = params.readcount_intervals ? Channel.fromPath(params.readcount_intervals).collect() - : ( ch_references.readcount_intervals ?: Channel.empty() ) + : Channel.empty() ch_reduced_penetrance = params.reduced_penetrance ? Channel.fromPath(params.reduced_penetrance).collect() : Channel.value([]) ch_rtg_truthvcfs = params.rtg_truthvcfs ? Channel.fromPath(params.rtg_truthvcfs).collect() From 74087e2ea0594044b95cf2776d9c414e4d786f96 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 20 Jun 2024 22:38:54 +0200 Subject: [PATCH 019/113] add skip_snv_calling --- nextflow_schema.json | 5 + workflows/raredisease.nf | 274 +++++++++++++++++++++------------------ 2 files changed, 151 insertions(+), 128 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 4a7a459d..557b1ec2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -578,6 +578,11 @@ "description": "Specifies whether or not to skip annotate SNV subworkflow.", "fa_icon": "fas fa-toggle-on" }, + "skip_snv_calling": { + "type": "boolean", + "description": "Specifies whether or not to skip nuclear and mitochondrial SNV calling and annotation.", + "fa_icon": "fas fa-toggle-on" + }, "skip_sv_annotation": { "type": "boolean", "description": "Specifies whether or not to skip annotate structural variant subworkflow.", diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index e9ef671c..26c56682 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -355,9 +355,12 @@ workflow RAREDISEASE { ch_scatter_split_intervals = ch_scatter.split_intervals ?: Channel.empty() - // - // ALIGNING READS, FETCH STATS, AND MERGE. - // +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ALIGN & FETCH STATS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + ALIGN ( ch_samplesheet, ch_genome_fasta, @@ -406,9 +409,12 @@ workflow RAREDISEASE { ) ch_versions = ch_versions.mix(QC_BAM.out.versions) - // - // EXPANSIONHUNTER AND STRANGER - // +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CALL AND ANNOTATE REPEAT EXPANSIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + if (!params.skip_repeat_analysis) { if ( params.analysis_type.equals("wgs") ) { CALL_REPEAT_EXPANSIONS ( @@ -422,49 +428,133 @@ workflow RAREDISEASE { } } +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CALL AND ANNOTATE NUCLEAR AND MITOCHONDRIAL SNVs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + if (!params.skip_snv_calling) { + CALL_SNV ( + ch_mapped.genome_bam_bai, + ch_mapped.mt_bam_bai, + ch_mapped.mtshift_bam_bai, + ch_genome_chrsizes, + ch_genome_fasta, + ch_genome_fai, + ch_genome_dictionary, + ch_mt_intervals, + ch_mtshift_fasta, + ch_mtshift_fai, + ch_mtshift_dictionary, + ch_mtshift_intervals, + ch_mtshift_backchain, + ch_dbsnp, + ch_dbsnp_tbi, + ch_call_interval, + ch_ml_model, + ch_case_info, + ch_foundin_header, + Channel.value(params.sentieon_dnascope_pcr_indel_model) + ) + ch_versions = ch_versions.mix(CALL_SNV.out.versions) + // - // SNV CALLING + // ANNOTATE GENOME SNVs // - CALL_SNV ( - ch_mapped.genome_bam_bai, - ch_mapped.mt_bam_bai, - ch_mapped.mtshift_bam_bai, - ch_genome_chrsizes, - ch_genome_fasta, - ch_genome_fai, - ch_genome_dictionary, - ch_mt_intervals, - ch_mtshift_fasta, - ch_mtshift_fai, - ch_mtshift_dictionary, - ch_mtshift_intervals, - ch_mtshift_backchain, - ch_dbsnp, - ch_dbsnp_tbi, - ch_call_interval, - ch_ml_model, - ch_case_info, - ch_foundin_header, - Channel.value(params.sentieon_dnascope_pcr_indel_model) - ) - ch_versions = ch_versions.mix(CALL_SNV.out.versions) + + if (!params.skip_snv_annotation) { + + ANNOTATE_GENOME_SNVS ( + CALL_SNV.out.genome_vcf_tabix, + params.analysis_type, + ch_cadd_header, + ch_cadd_resources, + ch_vcfanno_resources, + ch_vcfanno_lua, + ch_vcfanno_toml, + params.genome, + params.vep_cache_version, + ch_vep_cache, + ch_genome_fasta, + ch_gnomad_af, + ch_samples, + ch_scatter_split_intervals, + ch_vep_extra_files, + ch_genome_chrsizes + ).set { ch_snv_annotate } + ch_versions = ch_versions.mix(ch_snv_annotate.versions) + + GENERATE_CLINICAL_SET_SNV( + ch_snv_annotate.vcf_ann, + ch_hgnc_ids + ) + ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SNV.out.versions) + + ANN_CSQ_PLI_SNV ( + GENERATE_CLINICAL_SET_SNV.out.vcf, + ch_variant_consequences_snv + ) + ch_versions = ch_versions.mix(ANN_CSQ_PLI_SNV.out.versions) + + RANK_VARIANTS_SNV ( + ANN_CSQ_PLI_SNV.out.vcf_ann, + ch_pedfile, + ch_reduced_penetrance, + ch_score_config_snv + ) + ch_versions = ch_versions.mix(RANK_VARIANTS_SNV.out.versions) + } // - // VARIANT EVALUATION + // ANNOTATE MT SNVs // - if (params.run_rtgvcfeval) { - VARIANT_EVALUATION ( - CALL_SNV.out.genome_vcf_tabix, - ch_genome_fai, - ch_rtg_truthvcfs, - ch_sdf - ) - ch_versions = ch_versions.mix(VARIANT_EVALUATION.out.versions) + + if (!params.skip_mt_annotation && (params.run_mt_for_wes || params.analysis_type.equals("wgs"))) { + + ANNOTATE_MT_SNVS ( + CALL_SNV.out.mt_vcf, + CALL_SNV.out.mt_tabix, + ch_cadd_header, + ch_cadd_resources, + ch_genome_fasta, + ch_vcfanno_resources, + ch_vcfanno_toml, + params.genome, + params.vep_cache_version, + ch_vep_cache, + ch_vep_extra_files + ).set { ch_mt_annotate } + ch_versions = ch_versions.mix(ch_mt_annotate.versions) + + GENERATE_CLINICAL_SET_MT( + ch_mt_annotate.vcf_ann, + ch_hgnc_ids + ) + ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_MT.out.versions) + + ANN_CSQ_PLI_MT( + GENERATE_CLINICAL_SET_MT.out.vcf, + ch_variant_consequences_snv + ) + ch_versions = ch_versions.mix(ANN_CSQ_PLI_MT.out.versions) + + RANK_VARIANTS_MT ( + ANN_CSQ_PLI_MT.out.vcf_ann, + ch_pedfile, + ch_reduced_penetrance, + ch_score_config_mt + ) + ch_versions = ch_versions.mix(RANK_VARIANTS_MT.out.versions) + } } - // - // SV CALLING - // +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CALL AND ANNOTATE NUCLEAR AND MITOCHONDRIAL SVs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + CALL_STRUCTURAL_VARIANTS ( ch_mapped.genome_marked_bam, ch_mapped.genome_marked_bai, @@ -524,94 +614,8 @@ workflow RAREDISEASE { } - // - // ANNOTATE GENOME SNVs - // - if (!params.skip_snv_annotation) { - ANNOTATE_GENOME_SNVS ( - CALL_SNV.out.genome_vcf_tabix, - params.analysis_type, - ch_cadd_header, - ch_cadd_resources, - ch_vcfanno_resources, - ch_vcfanno_lua, - ch_vcfanno_toml, - params.genome, - params.vep_cache_version, - ch_vep_cache, - ch_genome_fasta, - ch_gnomad_af, - ch_samples, - ch_scatter_split_intervals, - ch_vep_extra_files, - ch_genome_chrsizes - ).set { ch_snv_annotate } - ch_versions = ch_versions.mix(ch_snv_annotate.versions) - - GENERATE_CLINICAL_SET_SNV( - ch_snv_annotate.vcf_ann, - ch_hgnc_ids - ) - ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SNV.out.versions) - ANN_CSQ_PLI_SNV ( - GENERATE_CLINICAL_SET_SNV.out.vcf, - ch_variant_consequences_snv - ) - ch_versions = ch_versions.mix(ANN_CSQ_PLI_SNV.out.versions) - - RANK_VARIANTS_SNV ( - ANN_CSQ_PLI_SNV.out.vcf_ann, - ch_pedfile, - ch_reduced_penetrance, - ch_score_config_snv - ) - ch_versions = ch_versions.mix(RANK_VARIANTS_SNV.out.versions) - - } - - // - // ANNOTATE MT SNVs - // - if (!params.skip_mt_annotation && (params.run_mt_for_wes || params.analysis_type.equals("wgs"))) { - - ANNOTATE_MT_SNVS ( - CALL_SNV.out.mt_vcf, - CALL_SNV.out.mt_tabix, - ch_cadd_header, - ch_cadd_resources, - ch_genome_fasta, - ch_vcfanno_resources, - ch_vcfanno_toml, - params.genome, - params.vep_cache_version, - ch_vep_cache, - ch_vep_extra_files - ).set { ch_mt_annotate } - ch_versions = ch_versions.mix(ch_mt_annotate.versions) - - GENERATE_CLINICAL_SET_MT( - ch_mt_annotate.vcf_ann, - ch_hgnc_ids - ) - ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_MT.out.versions) - - ANN_CSQ_PLI_MT( - GENERATE_CLINICAL_SET_MT.out.vcf, - ch_variant_consequences_snv - ) - ch_versions = ch_versions.mix(ANN_CSQ_PLI_MT.out.versions) - - RANK_VARIANTS_MT ( - ANN_CSQ_PLI_MT.out.vcf_ann, - ch_pedfile, - ch_reduced_penetrance, - ch_score_config_mt - ) - ch_versions = ch_versions.mix(RANK_VARIANTS_MT.out.versions) - - } // STEP 1.7: SMNCOPYNUMBERCALLER RENAME_BAM_FOR_SMNCALLER(ch_mapped.genome_marked_bam, "bam").output @@ -712,6 +716,20 @@ workflow RAREDISEASE { } } + + // + // VARIANT EVALUATION + // + if (params.run_rtgvcfeval) { + VARIANT_EVALUATION ( + CALL_SNV.out.genome_vcf_tabix, + ch_genome_fai, + ch_rtg_truthvcfs, + ch_sdf + ) + ch_versions = ch_versions.mix(VARIANT_EVALUATION.out.versions) + } + // // Collate and save software versions // From a27392daf7a83c6a78746171335e9225312b36f9 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 21 Jun 2024 09:47:01 +0200 Subject: [PATCH 020/113] add to nextflow.config --- nextflow.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow.config b/nextflow.config index 66588359..5d4bda28 100644 --- a/nextflow.config +++ b/nextflow.config @@ -39,6 +39,7 @@ params { skip_mt_annotation = false skip_qualimap = false skip_repeat_analysis = false + skip_snv_calling = false skip_snv_annotation = false skip_sv_annotation = false skip_mt_subsample = false From e39c099469cd8e868d7d52d5f7a7bf1c1c2eb537 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 21 Jun 2024 09:58:25 +0200 Subject: [PATCH 021/113] reorder --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 5d4bda28..03c81529 100644 --- a/nextflow.config +++ b/nextflow.config @@ -39,8 +39,8 @@ params { skip_mt_annotation = false skip_qualimap = false skip_repeat_analysis = false - skip_snv_calling = false skip_snv_annotation = false + skip_snv_calling = false skip_sv_annotation = false skip_mt_subsample = false skip_vcf2cytosure = true From 7cc4bda2072f1ea4441adc520ade74e98a860872 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 21 Jun 2024 10:01:43 +0200 Subject: [PATCH 022/113] reorder sv bits --- nextflow.config | 1 + nextflow_schema.json | 5 + workflows/raredisease.nf | 257 +++++++++++++++++++++++---------------- 3 files changed, 160 insertions(+), 103 deletions(-) diff --git a/nextflow.config b/nextflow.config index 03c81529..9ab21daa 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,6 +42,7 @@ params { skip_snv_annotation = false skip_snv_calling = false skip_sv_annotation = false + skip_sv_calling = false skip_mt_subsample = false skip_vcf2cytosure = true skip_vep_filter = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 557b1ec2..8f1b9df8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -588,6 +588,11 @@ "description": "Specifies whether or not to skip annotate structural variant subworkflow.", "fa_icon": "fas fa-toggle-on" }, + "skip_sv_calling": { + "type": "boolean", + "description": "Specifies whether or not to skip nuclear and mitochondrial SV calling and annotation.", + "fa_icon": "fas fa-toggle-on" + }, "skip_vcf2cytosure": { "type": "boolean", "default": true, diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 26c56682..06dd9a23 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -170,7 +170,9 @@ workflow RAREDISEASE { ch_samples = ch_samplesheet.map { meta, fastqs -> meta} ch_case_info = ch_samples.toList().map { CustomFunctions.createCaseChannel(it) } + // // Initialize file channels for PREPARE_REFERENCES subworkflow + // ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect() ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() : Channel.empty() @@ -187,7 +189,9 @@ workflow RAREDISEASE { ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect() : Channel.value([[],[]]) + // // Prepare references and indices. + // PREPARE_REFERENCES ( ch_genome_fasta, ch_genome_fai, @@ -200,7 +204,9 @@ workflow RAREDISEASE { ) .set { ch_references } + // // Gather built indices or get them from the params + // ch_bait_intervals = ch_references.bait_intervals ch_cadd_header = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect() ch_cadd_resources = params.cadd_resources ? Channel.fromPath(params.cadd_resources).collect() @@ -303,19 +309,31 @@ workflow RAREDISEASE { : Channel.empty() ch_versions = ch_versions.mix(ch_references.versions) + // // SV caller priority + // if (params.skip_germlinecnvcaller) { - ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"]) + if (params.analysis_type.equals("wgs")) { + ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"]) + } else { + ch_svcaller_priority = Channel.value(["manta"]) } else { - ch_svcaller_priority = Channel.value(["tiddit", "manta", "gcnvcaller", "cnvnator"]) + if (params.analysis_type.equals("wgs")) { + ch_svcaller_priority = Channel.value(["tiddit", "manta", "gcnvcaller", "cnvnator"]) + } else { + ch_svcaller_priority = Channel.value(["manta", "gcnvcaller"]) + } } - + // // Generate pedigree file + // ch_pedfile = CREATE_PEDIGREE_FILE(ch_samples.toList()).ped ch_versions = ch_versions.mix(CREATE_PEDIGREE_FILE.out.versions) + // // Read and store paths in the vep_plugin_files file + // if (params.vep_plugin_files) { ch_vep_extra_files_unsplit.splitCsv ( header:true ) .map { row -> @@ -330,7 +348,9 @@ workflow RAREDISEASE { .set {ch_vep_extra_files} } - // Read and store hgnc ids in a channel + // + // Dump all HGNC ids in a file + // ch_vep_filters_scout_fmt .mix (ch_vep_filters_std_fmt) .set {ch_vep_filters} @@ -339,13 +359,17 @@ workflow RAREDISEASE { .txt .set {ch_hgnc_ids} + // // Input QC + // if (!params.skip_fastqc) { FASTQC (ch_samplesheet) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) } - // CREATE CHROMOSOME BED AND INTERVALS + // + // Create chromosome bed and intervals for splitting and gathering operations + // SCATTER_GENOME ( ch_genome_dictionary, ch_genome_fai, @@ -551,73 +575,121 @@ workflow RAREDISEASE { /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CALL AND ANNOTATE NUCLEAR AND MITOCHONDRIAL SVs + CALL AND ANNOTATE NUCLEAR SVs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - CALL_STRUCTURAL_VARIANTS ( - ch_mapped.genome_marked_bam, - ch_mapped.genome_marked_bai, - ch_mapped.genome_bam_bai, - ch_mapped.mt_bam_bai, - ch_mapped.mtshift_bam_bai, - ch_genome_bwaindex, - ch_genome_fasta, - ch_genome_fai, - ch_mtshift_fasta, - ch_case_info, - ch_target_bed, - ch_genome_dictionary, - ch_svcaller_priority, - ch_readcount_intervals, - ch_ploidy_model, - ch_gcnvcaller_model - ) - ch_versions = ch_versions.mix(CALL_STRUCTURAL_VARIANTS.out.versions) + if (!params.skip_sv_calling) { + CALL_STRUCTURAL_VARIANTS ( + ch_mapped.genome_marked_bam, + ch_mapped.genome_marked_bai, + ch_mapped.genome_bam_bai, + ch_mapped.mt_bam_bai, + ch_mapped.mtshift_bam_bai, + ch_genome_bwaindex, + ch_genome_fasta, + ch_genome_fai, + ch_mtshift_fasta, + ch_case_info, + ch_target_bed, + ch_genome_dictionary, + ch_svcaller_priority, + ch_readcount_intervals, + ch_ploidy_model, + ch_gcnvcaller_model + ) + ch_versions = ch_versions.mix(CALL_STRUCTURAL_VARIANTS.out.versions) // // ANNOTATE STRUCTURAL VARIANTS // - if (!params.skip_sv_annotation) { - ANNOTATE_STRUCTURAL_VARIANTS ( - CALL_STRUCTURAL_VARIANTS.out.vcf, - ch_sv_dbs, - ch_sv_bedpedbs, - params.genome, - params.vep_cache_version, - ch_vep_cache, - ch_genome_fasta, - ch_genome_dictionary, - ch_vep_extra_files - ).set { ch_sv_annotate } - ch_versions = ch_versions.mix(ch_sv_annotate.versions) + if (!params.skip_sv_annotation) { + ANNOTATE_STRUCTURAL_VARIANTS ( + CALL_STRUCTURAL_VARIANTS.out.vcf, + ch_sv_dbs, + ch_sv_bedpedbs, + params.genome, + params.vep_cache_version, + ch_vep_cache, + ch_genome_fasta, + ch_genome_dictionary, + ch_vep_extra_files + ).set { ch_sv_annotate } + ch_versions = ch_versions.mix(ch_sv_annotate.versions) - GENERATE_CLINICAL_SET_SV( - ch_sv_annotate.vcf_ann, - ch_hgnc_ids - ) - ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SV.out.versions) + GENERATE_CLINICAL_SET_SV( + ch_sv_annotate.vcf_ann, + ch_hgnc_ids + ) + ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SV.out.versions) - ANN_CSQ_PLI_SV ( - GENERATE_CLINICAL_SET_SV.out.vcf, - ch_variant_consequences_sv - ) - ch_versions = ch_versions.mix(ANN_CSQ_PLI_SV.out.versions) + ANN_CSQ_PLI_SV ( + GENERATE_CLINICAL_SET_SV.out.vcf, + ch_variant_consequences_sv + ) + ch_versions = ch_versions.mix(ANN_CSQ_PLI_SV.out.versions) + + RANK_VARIANTS_SV ( + ANN_CSQ_PLI_SV.out.vcf_ann, + ch_pedfile, + ch_reduced_penetrance, + ch_score_config_sv + ) + ch_versions = ch_versions.mix(RANK_VARIANTS_SV.out.versions) + } + } - RANK_VARIANTS_SV ( - ANN_CSQ_PLI_SV.out.vcf_ann, - ch_pedfile, - ch_reduced_penetrance, - ch_score_config_sv +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CALL AND ANNOTATE MOBILE ELEMENTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + if (!params.skip_me_calling || params.analysis_type.equals("wes")) { + CALL_MOBILE_ELEMENTS( + ch_mapped.genome_bam_bai, + ch_genome_fasta, + ch_genome_fai, + ch_me_references, + ch_case_info, + params.genome ) - ch_versions = ch_versions.mix(RANK_VARIANTS_SV.out.versions) + ch_versions = ch_versions.mix(CALL_MOBILE_ELEMENTS.out.versions) - } + if (!params.skip_me_annotation) { + ANNOTATE_MOBILE_ELEMENTS( + CALL_MOBILE_ELEMENTS.out.vcf, + ch_me_svdb_resources, + ch_genome_fasta, + ch_genome_dictionary, + ch_vep_cache, + params.genome, + params.vep_cache_version, + ch_vep_extra_files + ) + ch_versions = ch_versions.mix(ANNOTATE_MOBILE_ELEMENTS.out.versions) + GENERATE_CLINICAL_SET_ME( + ANNOTATE_MOBILE_ELEMENTS.out.vcf, + ch_hgnc_ids + ) + ch_versions = ch_versions.mix( GENERATE_CLINICAL_SET_ME.out.versions ) + ANN_CSQ_PLI_ME( + GENERATE_CLINICAL_SET_ME.out.vcf, + ch_variant_consequences_sv + ) + ch_versions = ch_versions.mix( ANN_CSQ_PLI_ME.out.versions ) + + } + } +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SMNCOPYNUMBERCALLER +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - // STEP 1.7: SMNCOPYNUMBERCALLER RENAME_BAM_FOR_SMNCALLER(ch_mapped.genome_marked_bam, "bam").output .collect{it} .toList() @@ -640,7 +712,11 @@ workflow RAREDISEASE { ch_versions = ch_versions.mix(RENAME_BAI_FOR_SMNCALLER.out.versions) ch_versions = ch_versions.mix(SMNCOPYNUMBERCALLER.out.versions) - // ped correspondence, sex check, ancestry check +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + PEDDY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ if (!params.skip_peddy) { PEDDY ( CALL_SNV.out.genome_vcf.join(CALL_SNV.out.genome_tabix, failOnMismatch:true, failOnDuplicate:true), @@ -649,7 +725,11 @@ workflow RAREDISEASE { ch_versions = ch_versions.mix(PEDDY.out.versions.first()) } - // Generate CGH files from sequencing data, turned off by default +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Generate CGH files from sequencing data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ if ( !params.skip_vcf2cytosure && params.analysis_type != "wes" ) { GENERATE_CYTOSURE_FILES ( ch_sv_annotate.vcf_ann, @@ -661,7 +741,11 @@ workflow RAREDISEASE { ch_versions = ch_versions.mix(GENERATE_CYTOSURE_FILES.out.versions) } - // GENS +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + GENS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ if ( !params.skip_gens && params.analysis_type != "wes" ) { GENS ( ch_mapped.genome_bam_bai, @@ -678,48 +762,12 @@ workflow RAREDISEASE { ch_versions = ch_versions.mix(GENS.out.versions) } - if (!params.skip_me_calling) { - CALL_MOBILE_ELEMENTS( - ch_mapped.genome_bam_bai, - ch_genome_fasta, - ch_genome_fai, - ch_me_references, - ch_case_info, - params.genome - ) - ch_versions = ch_versions.mix(CALL_MOBILE_ELEMENTS.out.versions) - - if (!params.skip_me_annotation) { - ANNOTATE_MOBILE_ELEMENTS( - CALL_MOBILE_ELEMENTS.out.vcf, - ch_me_svdb_resources, - ch_genome_fasta, - ch_genome_dictionary, - ch_vep_cache, - params.genome, - params.vep_cache_version, - ch_vep_extra_files - ) - ch_versions = ch_versions.mix(ANNOTATE_MOBILE_ELEMENTS.out.versions) - - GENERATE_CLINICAL_SET_ME( - ANNOTATE_MOBILE_ELEMENTS.out.vcf, - ch_hgnc_ids - ) - ch_versions = ch_versions.mix( GENERATE_CLINICAL_SET_ME.out.versions ) - - ANN_CSQ_PLI_ME( - GENERATE_CLINICAL_SET_ME.out.vcf, - ch_variant_consequences_sv - ) - ch_versions = ch_versions.mix( ANN_CSQ_PLI_ME.out.versions ) - - } - } +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VARIANT EVALUATION WITH RTGTOOLS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - // - // VARIANT EVALUATION - // if (params.run_rtgvcfeval) { VARIANT_EVALUATION ( CALL_SNV.out.genome_vcf_tabix, @@ -730,9 +778,12 @@ workflow RAREDISEASE { ch_versions = ch_versions.mix(VARIANT_EVALUATION.out.versions) } - // - // Collate and save software versions - // +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COLLECT SOFTWARE VERSIONS & MultiQC +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + softwareVersionsToYAML(ch_versions) .collectFile( storeDir: "${params.outdir}/pipeline_info", From ee2a11d46c0f2987aa5cf7fa0ae1272801c194cd Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 21 Jun 2024 10:03:05 +0200 Subject: [PATCH 023/113] fix indents --- workflows/raredisease.nf | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 26c56682..fd2d759b 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -459,10 +459,9 @@ workflow RAREDISEASE { ) ch_versions = ch_versions.mix(CALL_SNV.out.versions) - // - // ANNOTATE GENOME SNVs - // - + // + // ANNOTATE GENOME SNVs + // if (!params.skip_snv_annotation) { ANNOTATE_GENOME_SNVS ( @@ -506,10 +505,9 @@ workflow RAREDISEASE { ch_versions = ch_versions.mix(RANK_VARIANTS_SNV.out.versions) } - // - // ANNOTATE MT SNVs - // - + // + // ANNOTATE MT SNVs + // if (!params.skip_mt_annotation && (params.run_mt_for_wes || params.analysis_type.equals("wgs"))) { ANNOTATE_MT_SNVS ( From 62206aed4105e7c839e793dc05bf4f7178699295 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 21 Jun 2024 10:09:34 +0200 Subject: [PATCH 024/113] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e998348..b30c3e33 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` -- `readcount_intervals` parameter is now manadatory for running germlinecnvcaller. [#570](https://github.com/nf-core/raredisease/pull/570) +- `readcount_intervals` parameter is now mandatory for running germlinecnvcaller. [#570](https://github.com/nf-core/raredisease/pull/570) ### `Fixed` From d2e1aa71850817c080930966bf7556b0f91b787d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 22 Jun 2024 10:35:39 +0200 Subject: [PATCH 025/113] wes updates --- .../local/call_structural_variants.nf | 35 ++++++---- workflows/raredisease.nf | 65 +++++++++---------- 2 files changed, 55 insertions(+), 45 deletions(-) diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf index 76f40af5..f85a1750 100644 --- a/subworkflows/local/call_structural_variants.nf +++ b/subworkflows/local/call_structural_variants.nf @@ -38,10 +38,17 @@ workflow CALL_STRUCTURAL_VARIANTS { .collect{it[1]} .set{ manta_vcf } - CALL_SV_TIDDIT (ch_genome_bam_bai, ch_genome_fasta, ch_bwa_index, ch_case_info) - .vcf - .collect{it[1]} - .set { tiddit_vcf } + if (params.analysis_type.equals("wgs")) { + CALL_SV_TIDDIT (ch_genome_bam_bai, ch_genome_fasta, ch_bwa_index, ch_case_info) + .vcf + .collect{it[1]} + .set { tiddit_vcf } + + CALL_SV_CNVNATOR (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_case_info) + .vcf + .collect{it[1]} + .set { cnvnator_vcf } + } if (!params.skip_germlinecnvcaller) { CALL_SV_GERMLINECNVCALLER (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_readcount_intervals, ch_genome_dictionary, ch_ploidy_model, ch_gcnvcaller_model) @@ -52,11 +59,6 @@ workflow CALL_STRUCTURAL_VARIANTS { ch_versions = ch_versions.mix(CALL_SV_GERMLINECNVCALLER.out.versions) } - CALL_SV_CNVNATOR (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_case_info) - .vcf - .collect{it[1]} - .set { cnvnator_vcf } - if (params.analysis_type.equals("wgs") || params.run_mt_for_wes) { CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta) ch_versions = ch_versions.mix(CALL_SV_MT.out.versions) @@ -64,16 +66,25 @@ workflow CALL_STRUCTURAL_VARIANTS { //merge if (params.skip_germlinecnvcaller) { + if (params.analysis_type.equals("wgs")) { + tiddit_vcf + .combine(manta_vcf) + .combine(cnvnator_vcf) + .toList() + .set { vcf_list } + } else { + vcf_list = manta_vcf + } + } else if (params.analysis_type.equals("wgs")){ tiddit_vcf .combine(manta_vcf) + .combine(gcnvcaller_vcf) .combine(cnvnator_vcf) .toList() .set { vcf_list } } else { - tiddit_vcf - .combine(manta_vcf) + manta_vcf .combine(gcnvcaller_vcf) - .combine(cnvnator_vcf) .toList() .set { vcf_list } } diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index be4bda04..8bce9e54 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -439,17 +439,15 @@ workflow RAREDISEASE { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - if (!params.skip_repeat_analysis) { - if ( params.analysis_type.equals("wgs") ) { - CALL_REPEAT_EXPANSIONS ( - ch_mapped.genome_bam_bai, - ch_variant_catalog, - ch_case_info, - ch_genome_fasta, - ch_genome_fai - ) - ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions) - } + if (!params.skip_repeat_analysis && params.analysis_type.equals("wgs") ) { + CALL_REPEAT_EXPANSIONS ( + ch_mapped.genome_bam_bai, + ch_variant_catalog, + ch_case_info, + ch_genome_fasta, + ch_genome_fai + ) + ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions) } /* @@ -643,7 +641,7 @@ workflow RAREDISEASE { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - if (!params.skip_me_calling || params.analysis_type.equals("wes")) { + if (!params.skip_me_calling && params.analysis_type.equals("wgs")) { CALL_MOBILE_ELEMENTS( ch_mapped.genome_bam_bai, ch_genome_fasta, @@ -688,28 +686,29 @@ workflow RAREDISEASE { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - RENAME_BAM_FOR_SMNCALLER(ch_mapped.genome_marked_bam, "bam").output - .collect{it} - .toList() - .set { ch_bam_list } - - RENAME_BAI_FOR_SMNCALLER(ch_mapped.genome_marked_bai, "bam.bai").output - .collect{it} - .toList() - .set { ch_bai_list } + if ( params.analysis_type.equals("wgs") ) { + RENAME_BAM_FOR_SMNCALLER(ch_mapped.genome_marked_bam, "bam").output + .collect{it} + .toList() + .set { ch_bam_list } - ch_case_info - .combine(ch_bam_list) - .combine(ch_bai_list) - .set { ch_bams_bais } + RENAME_BAI_FOR_SMNCALLER(ch_mapped.genome_marked_bai, "bam.bai").output + .collect{it} + .toList() + .set { ch_bai_list } - SMNCOPYNUMBERCALLER ( - ch_bams_bais - ) - ch_versions = ch_versions.mix(RENAME_BAM_FOR_SMNCALLER.out.versions) - ch_versions = ch_versions.mix(RENAME_BAI_FOR_SMNCALLER.out.versions) - ch_versions = ch_versions.mix(SMNCOPYNUMBERCALLER.out.versions) + ch_case_info + .combine(ch_bam_list) + .combine(ch_bai_list) + .set { ch_bams_bais } + SMNCOPYNUMBERCALLER ( + ch_bams_bais + ) + ch_versions = ch_versions.mix(RENAME_BAM_FOR_SMNCALLER.out.versions) + ch_versions = ch_versions.mix(RENAME_BAI_FOR_SMNCALLER.out.versions) + ch_versions = ch_versions.mix(SMNCOPYNUMBERCALLER.out.versions) + } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PEDDY @@ -728,7 +727,7 @@ workflow RAREDISEASE { Generate CGH files from sequencing data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - if ( !params.skip_vcf2cytosure && params.analysis_type != "wes" ) { + if ( !params.skip_vcf2cytosure && params.analysis_type.equals("wgs") ) { GENERATE_CYTOSURE_FILES ( ch_sv_annotate.vcf_ann, ch_sv_annotate.tbi, @@ -744,7 +743,7 @@ workflow RAREDISEASE { GENS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - if ( !params.skip_gens && params.analysis_type != "wes" ) { + if ( !params.skip_gens && params.analysis_type.equals("wgs") ) { GENS ( ch_mapped.genome_bam_bai, CALL_SNV.out.genome_gvcf, From 4def4823f28cd1e1f98d5a25f48995e1e109d214 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 10:24:12 +0200 Subject: [PATCH 026/113] review suggestions --- docs/usage.md | 46 +++++++++++++++++++++++--------------------- nextflow_schema.json | 2 +- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 996d0e6a..a742d128 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,24 +10,24 @@ Table of contents: - [Run nf-core/raredisease with test data](#run-nf-coreraredisease-with-test-data) - [Updating the pipeline](#updating-the-pipeline) - [Run nf-core/raredisease with your data](#run-nf-coreraredisease-with-your-data) - - [Samplesheet](#samplesheet) - - [Reference files and parameters](#reference-files-and-parameters) - - [1. Alignment](#1-alignment) - - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) - - [3. Repeat expansions](#3-repeat-expansions) - - [4. Variant calling - SNV](#4-variant-calling---snv) - - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) - - [6. Copy number variant calling](#6-copy-number-variant-calling) - - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) - - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) - - [9. Mitochondrial annotation](#9-mitochondrial-annotation) - - [10. Mobile element calling](#10-mobile-element-calling) - - [11. Mobile element annotation](#11-mobile-element-annotation) - - [12. Variant evaluation](#12-variant-evaluation) - - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) - - [Run the pipeline](#run-the-pipeline) - - [Direct input in CLI](#direct-input-in-cli) - - [Import from a config file (recommended)](#import-from-a-config-file-recommended) + - [Samplesheet](#samplesheet) + - [Reference files and parameters](#reference-files-and-parameters) + - [1. Alignment](#1-alignment) + - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) + - [3. Repeat expansions](#3-repeat-expansions) + - [4. Variant calling - SNV](#4-variant-calling---snv) + - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) + - [6. Copy number variant calling](#6-copy-number-variant-calling) + - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) + - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) + - [9. Mitochondrial annotation](#9-mitochondrial-annotation) + - [10. Mobile element calling](#10-mobile-element-calling) + - [11. Mobile element annotation](#11-mobile-element-annotation) + - [12. Variant evaluation](#12-variant-evaluation) + - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) + - [Run the pipeline](#run-the-pipeline) + - [Direct input in CLI](#direct-input-in-cli) + - [Import from a config file (recommended)](#import-from-a-config-file-recommended) - [Best practices](#best-practices) - [Core Nextflow arguments](#core-nextflow-arguments) - [`-profile`](#-profile) @@ -215,14 +215,16 @@ The mandatory and optional parameters for each category are tabulated below. ##### 6. Copy number variant calling -| Mandatory | Optional | -| ------------------------------ | ------------------------------- | -| ploidy_model1 | readcount_intervals3 | -| gcnvcaller_model1,2 | | +| Mandatory | Optional | +| --------------------------------- | -------- | +| ploidy_model1,4 | | +| gcnvcaller_model1,2,4 | | +| readcount_intervals3,4 | | 1 Output from steps 3 & 4 of GATK's CNV calling pipeline run in cohort mode as described [here](https://gatk.broadinstitute.org/hc/en-us/articles/360035531152--How-to-Call-common-and-rare-germline-copy-number-variants).
    2 Sample file can be found [here](https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/gcnvmodels.tsv) (Note the header 'models' in the sample file).
    3 Output from step 1 of GATK's CNV calling pipeline as described [here](https://gatk.broadinstitute.org/hc/en-us/articles/360035531152--How-to-Call-common-and-rare-germline-copy-number-variants).
    +4 All these files can be generated using the germlinecnvcaller tool option in nf-core/createpanelrefs.
    ##### 7. SNV annotation & Ranking diff --git a/nextflow_schema.json b/nextflow_schema.json index 8f1b9df8..edd444a1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -281,7 +281,7 @@ "fa_icon": "fas fa-file", "description": "Interval list file containing the intervals over which read counts are tabulated for CNV calling", "format": "file-path", - "help_text": "Generated by GATK4 preprocessintervals." + "help_text": "Generated by GATK4 preprocessintervals It needs to be the same as the intervals used to generate the ploidy and cnv models." }, "reduced_penetrance": { "type": "string", From 1ffcaa12e7a4b0330866460f17256936a0972727 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 10:27:51 +0200 Subject: [PATCH 027/113] review suggestions --- docs/usage.md | 46 +++++++++++++++++++++++--------------------- nextflow_schema.json | 2 +- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 996d0e6a..a742d128 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,24 +10,24 @@ Table of contents: - [Run nf-core/raredisease with test data](#run-nf-coreraredisease-with-test-data) - [Updating the pipeline](#updating-the-pipeline) - [Run nf-core/raredisease with your data](#run-nf-coreraredisease-with-your-data) - - [Samplesheet](#samplesheet) - - [Reference files and parameters](#reference-files-and-parameters) - - [1. Alignment](#1-alignment) - - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) - - [3. Repeat expansions](#3-repeat-expansions) - - [4. Variant calling - SNV](#4-variant-calling---snv) - - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) - - [6. Copy number variant calling](#6-copy-number-variant-calling) - - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) - - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) - - [9. Mitochondrial annotation](#9-mitochondrial-annotation) - - [10. Mobile element calling](#10-mobile-element-calling) - - [11. Mobile element annotation](#11-mobile-element-annotation) - - [12. Variant evaluation](#12-variant-evaluation) - - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) - - [Run the pipeline](#run-the-pipeline) - - [Direct input in CLI](#direct-input-in-cli) - - [Import from a config file (recommended)](#import-from-a-config-file-recommended) + - [Samplesheet](#samplesheet) + - [Reference files and parameters](#reference-files-and-parameters) + - [1. Alignment](#1-alignment) + - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) + - [3. Repeat expansions](#3-repeat-expansions) + - [4. Variant calling - SNV](#4-variant-calling---snv) + - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) + - [6. Copy number variant calling](#6-copy-number-variant-calling) + - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) + - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) + - [9. Mitochondrial annotation](#9-mitochondrial-annotation) + - [10. Mobile element calling](#10-mobile-element-calling) + - [11. Mobile element annotation](#11-mobile-element-annotation) + - [12. Variant evaluation](#12-variant-evaluation) + - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) + - [Run the pipeline](#run-the-pipeline) + - [Direct input in CLI](#direct-input-in-cli) + - [Import from a config file (recommended)](#import-from-a-config-file-recommended) - [Best practices](#best-practices) - [Core Nextflow arguments](#core-nextflow-arguments) - [`-profile`](#-profile) @@ -215,14 +215,16 @@ The mandatory and optional parameters for each category are tabulated below. ##### 6. Copy number variant calling -| Mandatory | Optional | -| ------------------------------ | ------------------------------- | -| ploidy_model1 | readcount_intervals3 | -| gcnvcaller_model1,2 | | +| Mandatory | Optional | +| --------------------------------- | -------- | +| ploidy_model1,4 | | +| gcnvcaller_model1,2,4 | | +| readcount_intervals3,4 | | 1 Output from steps 3 & 4 of GATK's CNV calling pipeline run in cohort mode as described [here](https://gatk.broadinstitute.org/hc/en-us/articles/360035531152--How-to-Call-common-and-rare-germline-copy-number-variants).
    2 Sample file can be found [here](https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/gcnvmodels.tsv) (Note the header 'models' in the sample file).
    3 Output from step 1 of GATK's CNV calling pipeline as described [here](https://gatk.broadinstitute.org/hc/en-us/articles/360035531152--How-to-Call-common-and-rare-germline-copy-number-variants).
    +4 All these files can be generated using the germlinecnvcaller tool option in nf-core/createpanelrefs.
    ##### 7. SNV annotation & Ranking diff --git a/nextflow_schema.json b/nextflow_schema.json index 7004ba9e..41cea99c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -281,7 +281,7 @@ "fa_icon": "fas fa-file", "description": "Interval list file containing the intervals over which read counts are tabulated for CNV calling", "format": "file-path", - "help_text": "Generated by GATK4 preprocessintervals." + "help_text": "Generated by GATK4 preprocessintervals. It needs to be the same as the intervals used to generate the ploidy and cnv models." }, "reduced_penetrance": { "type": "string", From ca4cdad1422ec206802b43d0c0a5ed331df4c44c Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 10:37:25 +0200 Subject: [PATCH 028/113] prettier --- docs/usage.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index a742d128..66d0b357 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,24 +10,24 @@ Table of contents: - [Run nf-core/raredisease with test data](#run-nf-coreraredisease-with-test-data) - [Updating the pipeline](#updating-the-pipeline) - [Run nf-core/raredisease with your data](#run-nf-coreraredisease-with-your-data) - - [Samplesheet](#samplesheet) - - [Reference files and parameters](#reference-files-and-parameters) - - [1. Alignment](#1-alignment) - - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) - - [3. Repeat expansions](#3-repeat-expansions) - - [4. Variant calling - SNV](#4-variant-calling---snv) - - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) - - [6. Copy number variant calling](#6-copy-number-variant-calling) - - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) - - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) - - [9. Mitochondrial annotation](#9-mitochondrial-annotation) - - [10. Mobile element calling](#10-mobile-element-calling) - - [11. Mobile element annotation](#11-mobile-element-annotation) - - [12. Variant evaluation](#12-variant-evaluation) - - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) - - [Run the pipeline](#run-the-pipeline) - - [Direct input in CLI](#direct-input-in-cli) - - [Import from a config file (recommended)](#import-from-a-config-file-recommended) + - [Samplesheet](#samplesheet) + - [Reference files and parameters](#reference-files-and-parameters) + - [1. Alignment](#1-alignment) + - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) + - [3. Repeat expansions](#3-repeat-expansions) + - [4. Variant calling - SNV](#4-variant-calling---snv) + - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) + - [6. Copy number variant calling](#6-copy-number-variant-calling) + - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) + - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) + - [9. Mitochondrial annotation](#9-mitochondrial-annotation) + - [10. Mobile element calling](#10-mobile-element-calling) + - [11. Mobile element annotation](#11-mobile-element-annotation) + - [12. Variant evaluation](#12-variant-evaluation) + - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) + - [Run the pipeline](#run-the-pipeline) + - [Direct input in CLI](#direct-input-in-cli) + - [Import from a config file (recommended)](#import-from-a-config-file-recommended) - [Best practices](#best-practices) - [Core Nextflow arguments](#core-nextflow-arguments) - [`-profile`](#-profile) From db743049cb5b5199c9ee355bbbed2fc4e0fa1bbe Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 11:10:35 +0200 Subject: [PATCH 029/113] update changelog --- CHANGELOG.md | 3 +++ workflows/raredisease.nf | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b30c3e33..755501b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- Two new parameters `skip_snv_calling` and `skip_repeat_analysis` to skip snv calling and repeat analysis respectively [#571](https://github.com/nf-core/raredisease/pull/571) - Two new parameters `mbuffer_mem` and `samtools_sort_threads` to control resources given to mbuffer and samtools sort in the bwameme module [#570](https://github.com/nf-core/raredisease/pull/570) ### `Changed` @@ -24,6 +25,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | ------------- | --------------------- | | | mbuffer_mem | | | samtools_sort_threads | +| | skip_repeat_analysis | +| | skip_snv_calling | ## 2.1.0 - Obelix [2024-05-29] diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index fd2d759b..ca0b2514 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -23,7 +23,6 @@ def mandatoryParams = [ "intervals_wgs", "intervals_y", "platform", - "variant_catalog", "variant_caller" ] def missingParamsCount = 0 @@ -32,6 +31,14 @@ if (params.run_rtgvcfeval) { mandatoryParams += ["rtg_truthvcfs"] } +if (!params.skip_repeat_analysis) { + mandatoryParams += ["variant_catalog"] +} + +if (!params.skip_snv_calling) { + mandatoryParams += ["genome"] +} + if (!params.skip_snv_annotation) { mandatoryParams += ["genome", "vcfanno_resources", "vcfanno_toml", "vep_cache", "vep_cache_version", "gnomad_af", "score_config_snv", "variant_consequences_snv"] From 07ff9d326524cce122a96a5c1e677d03768472f1 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:02:49 +0200 Subject: [PATCH 030/113] review suggestions --- workflows/raredisease.nf | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index ca0b2514..9b8e132e 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -422,19 +422,18 @@ workflow RAREDISEASE { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - if (!params.skip_repeat_analysis) { - if ( params.analysis_type.equals("wgs") ) { - CALL_REPEAT_EXPANSIONS ( - ch_mapped.genome_bam_bai, - ch_variant_catalog, - ch_case_info, - ch_genome_fasta, - ch_genome_fai - ) - ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions) - } + if (!params.skip_repeat_analysis && params.analysis_type.equals("wgs") ) { + CALL_REPEAT_EXPANSIONS ( + ch_mapped.genome_bam_bai, + ch_variant_catalog, + ch_case_info, + ch_genome_fasta, + ch_genome_fai + ) + ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions) } + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CALL AND ANNOTATE NUCLEAR AND MITOCHONDRIAL SNVs From 90fad322ea3eaacfe179d04962c2bc6e0d9df172 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:29:53 +0200 Subject: [PATCH 031/113] fix logic --- workflows/raredisease.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 9d37808b..ab2883d5 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -649,7 +649,7 @@ workflow RAREDISEASE { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - if (!params.skip_me_calling || params.analysis_type.equals("wes")) { + if (!params.skip_me_calling && params.analysis_type.equals("wgs")) { CALL_MOBILE_ELEMENTS( ch_mapped.genome_bam_bai, ch_genome_fasta, From f1ab55d2d33ffc4e1bce24a4c74a83d2b5fcc28d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:31:32 +0200 Subject: [PATCH 032/113] changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 755501b6..e0742170 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- A new parameter `skip_sv_calling` to skip sv calling workflow [#572](https://github.com/nf-core/raredisease/pull/572) - Two new parameters `skip_snv_calling` and `skip_repeat_analysis` to skip snv calling and repeat analysis respectively [#571](https://github.com/nf-core/raredisease/pull/571) - Two new parameters `mbuffer_mem` and `samtools_sort_threads` to control resources given to mbuffer and samtools sort in the bwameme module [#570](https://github.com/nf-core/raredisease/pull/570) @@ -27,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | | samtools_sort_threads | | | skip_repeat_analysis | | | skip_snv_calling | +| | skip_sv_calling | ## 2.1.0 - Obelix [2024-05-29] From 31e8f2edf47f7f087acb53ce45da6fe14438d4d9 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:54:14 +0200 Subject: [PATCH 033/113] fix error --- workflows/raredisease.nf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index ab2883d5..691d65b7 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -324,6 +324,7 @@ workflow RAREDISEASE { ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"]) } else { ch_svcaller_priority = Channel.value(["manta"]) + } } else { if (params.analysis_type.equals("wgs")) { ch_svcaller_priority = Channel.value(["tiddit", "manta", "gcnvcaller", "cnvnator"]) @@ -604,9 +605,9 @@ workflow RAREDISEASE { ) ch_versions = ch_versions.mix(CALL_STRUCTURAL_VARIANTS.out.versions) - // - // ANNOTATE STRUCTURAL VARIANTS - // + // + // ANNOTATE STRUCTURAL VARIANTS + // if (!params.skip_sv_annotation) { ANNOTATE_STRUCTURAL_VARIANTS ( CALL_STRUCTURAL_VARIANTS.out.vcf, From 66f4e486ac15413b31e95bc7508ea09fafdc7e21 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 15:28:39 +0200 Subject: [PATCH 034/113] update call sv workflow --- subworkflows/local/call_structural_variants.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf index f85a1750..e462e9d9 100644 --- a/subworkflows/local/call_structural_variants.nf +++ b/subworkflows/local/call_structural_variants.nf @@ -73,7 +73,9 @@ workflow CALL_STRUCTURAL_VARIANTS { .toList() .set { vcf_list } } else { - vcf_list = manta_vcf + manta_vcf + .toList() + .set { vcf_list } } } else if (params.analysis_type.equals("wgs")){ tiddit_vcf From 8e13411e2c89f4ac688e527ebb2e4f45747055c6 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 15:32:46 +0200 Subject: [PATCH 035/113] fix error --- docs/usage.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index a742d128..66d0b357 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,24 +10,24 @@ Table of contents: - [Run nf-core/raredisease with test data](#run-nf-coreraredisease-with-test-data) - [Updating the pipeline](#updating-the-pipeline) - [Run nf-core/raredisease with your data](#run-nf-coreraredisease-with-your-data) - - [Samplesheet](#samplesheet) - - [Reference files and parameters](#reference-files-and-parameters) - - [1. Alignment](#1-alignment) - - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) - - [3. Repeat expansions](#3-repeat-expansions) - - [4. Variant calling - SNV](#4-variant-calling---snv) - - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) - - [6. Copy number variant calling](#6-copy-number-variant-calling) - - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) - - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) - - [9. Mitochondrial annotation](#9-mitochondrial-annotation) - - [10. Mobile element calling](#10-mobile-element-calling) - - [11. Mobile element annotation](#11-mobile-element-annotation) - - [12. Variant evaluation](#12-variant-evaluation) - - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) - - [Run the pipeline](#run-the-pipeline) - - [Direct input in CLI](#direct-input-in-cli) - - [Import from a config file (recommended)](#import-from-a-config-file-recommended) + - [Samplesheet](#samplesheet) + - [Reference files and parameters](#reference-files-and-parameters) + - [1. Alignment](#1-alignment) + - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) + - [3. Repeat expansions](#3-repeat-expansions) + - [4. Variant calling - SNV](#4-variant-calling---snv) + - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) + - [6. Copy number variant calling](#6-copy-number-variant-calling) + - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) + - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) + - [9. Mitochondrial annotation](#9-mitochondrial-annotation) + - [10. Mobile element calling](#10-mobile-element-calling) + - [11. Mobile element annotation](#11-mobile-element-annotation) + - [12. Variant evaluation](#12-variant-evaluation) + - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) + - [Run the pipeline](#run-the-pipeline) + - [Direct input in CLI](#direct-input-in-cli) + - [Import from a config file (recommended)](#import-from-a-config-file-recommended) - [Best practices](#best-practices) - [Core Nextflow arguments](#core-nextflow-arguments) - [`-profile`](#-profile) From 0ccf23901676065e78f93d95945ee21a0dcfc405 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 15:43:09 +0200 Subject: [PATCH 036/113] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0742170..196c5852 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` - `readcount_intervals` parameter is now mandatory for running germlinecnvcaller. [#570](https://github.com/nf-core/raredisease/pull/570) +- Turn off CNVnator, TIDDIT, SMNCopyNumberCaller, Gens, and Vcf2cytosure for targeted analysis [#573](https://github.com/nf-core/raredisease/pull/573) ### `Fixed` From 4d74bafd3ae83ce5a9849876c657a7f390bb21b5 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 16:39:19 +0200 Subject: [PATCH 037/113] remove skip_eklipse --- .github/workflows/download_pipeline.yml | 8 -------- CHANGELOG.md | 2 ++ conf/test.config | 1 - conf/test_one_sample.config | 1 - nextflow.config | 1 - nextflow_schema.json | 5 ----- .../local/utils_nfcore_raredisease_pipeline/main.nf | 4 ++-- subworkflows/local/variant_calling/call_sv_MT.nf | 12 +++++------- 8 files changed, 9 insertions(+), 25 deletions(-) diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index f24cc9ff..42badad9 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -69,11 +69,3 @@ jobs: - name: Inspect download run: tree ./${{ env.REPOTITLE_LOWERCASE }} - - - name: Run the downloaded pipeline (stub) - id: stub_run_pipeline - continue-on-error: true - env: - NXF_SINGULARITY_CACHEDIR: ./ - NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results diff --git a/CHANGELOG.md b/CHANGELOG.md index 196c5852..7a4687e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- Remove several skip parameters that had been included in the pipeline to avoid failed CI tests [#574](https://github.com/nf-core/raredisease/pull/574) - `readcount_intervals` parameter is now mandatory for running germlinecnvcaller. [#570](https://github.com/nf-core/raredisease/pull/570) - Turn off CNVnator, TIDDIT, SMNCopyNumberCaller, Gens, and Vcf2cytosure for targeted analysis [#573](https://github.com/nf-core/raredisease/pull/573) @@ -30,6 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | | skip_repeat_analysis | | | skip_snv_calling | | | skip_sv_calling | +| skip_eklipse | | ## 2.1.0 - Obelix [2024-05-29] diff --git a/conf/test.config b/conf/test.config index 85a2e404..7250fef2 100644 --- a/conf/test.config +++ b/conf/test.config @@ -24,7 +24,6 @@ params { mito_name = 'MT' // analysis params - skip_eklipse = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_fastqc = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_germlinecnvcaller = true skip_haplocheck = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config index 4f641aac..c2ec91f8 100644 --- a/conf/test_one_sample.config +++ b/conf/test_one_sample.config @@ -24,7 +24,6 @@ params { mito_name = 'MT' // analysis params - skip_eklipse = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_fastqc = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_germlinecnvcaller = true skip_haplocheck = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI diff --git a/nextflow.config b/nextflow.config index 9ab21daa..4a155608 100644 --- a/nextflow.config +++ b/nextflow.config @@ -27,7 +27,6 @@ params { run_mt_for_wes = false run_rtgvcfeval = false save_mapped_as_cram = false - skip_eklipse = false skip_fastp = false skip_fastqc = false skip_gens = true diff --git a/nextflow_schema.json b/nextflow_schema.json index b0ef5f13..3c2213e7 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -533,11 +533,6 @@ "description": "Specifies whether or not to skip CNV calling using GATK's GermlineCNVCaller", "fa_icon": "fas fa-toggle-on" }, - "skip_eklipse": { - "type": "boolean", - "description": "Specifies whether or not to skip eKLIPse.", - "fa_icon": "fas fa-toggle-on" - }, "skip_peddy": { "type": "boolean", "description": "Specifies whether or not to skip peddy.", diff --git a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf index 54ee0a08..0374459f 100644 --- a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf @@ -229,7 +229,7 @@ def toolCitationText() { "TIDDIT (Eisfeldt et al., 2017),", "Manta (Chen et al., 2016),", "GLnexus (Yun et al., 2021),", - params.skip_eklipse ? "" : "eKLIPse (Goudenge et al., 2019),", + "eKLIPse (Goudenge et al., 2019),", ] repeat_call_text = [ "ExpansionHunter (Dolzhenko et al., 2019),", @@ -338,7 +338,7 @@ def toolBibliographyText() { "
  • Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2
  • ", "
  • Chen, X., Schulz-Trieglaff, O., Shaw, R., Barnes, B., Schlesinger, F., Källberg, M., Cox, A. J., Kruglyak, S., & Saunders, C. T. (2016). Manta: Rapid detection of structural variants and indels for germline and cancer sequencing applications. Bioinformatics, 32(8), 1220–1222. https://doi.org/10.1093/bioinformatics/btv710
  • ", "
  • Yun, T., Li, H., Chang, P.-C., Lin, M. F., Carroll, A., & McLean, C. Y. (2021). Accurate, scalable cohort variant calls using DeepVariant and GLnexus. Bioinformatics, 36(24), 5582–5589. https://doi.org/10.1093/bioinformatics/btaa1081
  • ", - params.skip_eklipse ? "" : "
  • Goudenège, D., Bris, C., Hoffmann, V., Desquiret-Dumas, V., Jardel, C., Rucheton, B., Bannwarth, S., Paquis-Flucklinger, V., Lebre, A. S., Colin, E., Amati-Bonneau, P., Bonneau, D., Reynier, P., Lenaers, G., & Procaccio, V. (2019). eKLIPse: A sensitive tool for the detection and quantification of mitochondrial DNA deletions from next-generation sequencing data. Genetics in Medicine, 21(6), 1407–1416. https://doi.org/10.1038/s41436-018-0350-8
  • ", + "
  • Goudenège, D., Bris, C., Hoffmann, V., Desquiret-Dumas, V., Jardel, C., Rucheton, B., Bannwarth, S., Paquis-Flucklinger, V., Lebre, A. S., Colin, E., Amati-Bonneau, P., Bonneau, D., Reynier, P., Lenaers, G., & Procaccio, V. (2019). eKLIPse: A sensitive tool for the detection and quantification of mitochondrial DNA deletions from next-generation sequencing data. Genetics in Medicine, 21(6), 1407–1416. https://doi.org/10.1038/s41436-018-0350-8
  • ", ] repeat_call_text = [ "
  • Dolzhenko, E., Deshpande, V., Schlesinger, F., Krusche, P., Petrovski, R., Chen, S., Emig-Agius, D., Gross, A., Narzisi, G., Bowman, B., Scheffler, K., van Vugt, J. J. F. A., French, C., Sanchis-Juan, A., Ibáñez, K., Tucci, A., Lajoie, B. R., Veldink, J. H., Raymond, F. L., … Eberle, M. A. (2019). ExpansionHunter: A sequence-graph-based tool to analyze variation in short tandem repeat regions. Bioinformatics, 35(22), 4754–4756. https://doi.org/10.1093/bioinformatics/btz431
  • ", diff --git a/subworkflows/local/variant_calling/call_sv_MT.nf b/subworkflows/local/variant_calling/call_sv_MT.nf index 7ce39b42..74d82d8e 100644 --- a/subworkflows/local/variant_calling/call_sv_MT.nf +++ b/subworkflows/local/variant_calling/call_sv_MT.nf @@ -16,13 +16,11 @@ workflow CALL_SV_MT { ch_eklipse_genes = Channel.empty() ch_eklipse_circos = Channel.empty() - if (!params.skip_eklipse){ - EKLIPSE(ch_bam_bai,[]) - ch_eklipse_del = EKLIPSE.out.deletions - ch_eklipse_genes = EKLIPSE.out.genes - ch_eklipse_circos = EKLIPSE.out.circos - ch_versions = ch_versions.mix(EKLIPSE.out.versions.first()) - } + EKLIPSE(ch_bam_bai,[]) + ch_eklipse_del = EKLIPSE.out.deletions + ch_eklipse_genes = EKLIPSE.out.genes + ch_eklipse_circos = EKLIPSE.out.circos + ch_versions = ch_versions.mix(EKLIPSE.out.versions.first()) MT_DELETION(ch_bam_bai, ch_fasta) From a58d5ef3e50b8bb363ff3147786fbba374129786 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 16:51:27 +0200 Subject: [PATCH 038/113] remove skip_haplocheck --- CHANGELOG.md | 17 +++++++++-------- conf/test.config | 1 - conf/test_one_sample.config | 1 - nextflow.config | 1 - nextflow_schema.json | 5 ----- .../utils_nfcore_raredisease_pipeline/main.nf | 4 ++-- .../local/variant_calling/call_snv_MT.nf | 10 ++++------ 7 files changed, 15 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a4687e8..88ba11c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,14 +24,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Parameters -| Old parameter | New parameter | -| ------------- | --------------------- | -| | mbuffer_mem | -| | samtools_sort_threads | -| | skip_repeat_analysis | -| | skip_snv_calling | -| | skip_sv_calling | -| skip_eklipse | | +| Old parameter | New parameter | +| --------------- | --------------------- | +| | mbuffer_mem | +| | samtools_sort_threads | +| | skip_repeat_analysis | +| | skip_snv_calling | +| | skip_sv_calling | +| skip_eklipse | | +| skip_haplocheck | | ## 2.1.0 - Obelix [2024-05-29] diff --git a/conf/test.config b/conf/test.config index 7250fef2..48848f04 100644 --- a/conf/test.config +++ b/conf/test.config @@ -26,7 +26,6 @@ params { // analysis params skip_fastqc = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_germlinecnvcaller = true - skip_haplocheck = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_qualimap = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI skip_mt_subsample = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip subsample on Github CI diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config index c2ec91f8..66a9adc6 100644 --- a/conf/test_one_sample.config +++ b/conf/test_one_sample.config @@ -26,7 +26,6 @@ params { // analysis params skip_fastqc = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_germlinecnvcaller = true - skip_haplocheck = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_qualimap = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI skip_mt_subsample = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip subsample on Github CI diff --git a/nextflow.config b/nextflow.config index 4a155608..74d622ac 100644 --- a/nextflow.config +++ b/nextflow.config @@ -31,7 +31,6 @@ params { skip_fastqc = false skip_gens = true skip_germlinecnvcaller = false - skip_haplocheck = false skip_peddy = false skip_me_calling = false skip_me_annotation = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 3c2213e7..7e2ff0f4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -518,11 +518,6 @@ "description": "Specifies whether or not to skip trimming with fastp.", "fa_icon": "fas fa-toggle-on" }, - "skip_haplocheck": { - "type": "boolean", - "description": "Specifies whether or not to skip haplocheck.", - "fa_icon": "fas fa-toggle-on" - }, "skip_gens": { "type": "boolean", "description": "Specifies whether or not to skip gens preprocessing subworkflow.", diff --git a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf index 0374459f..6bd3aaa6 100644 --- a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf @@ -224,7 +224,7 @@ def toolCitationText() { variant_call_text = [ params.variant_caller.equals("deepvariant") ? "DeepVariant (Poplin et al., 2018)," : "", params.variant_caller.equals("sentieon") ? "Sentieon DNAscope (Freed et al., 2022)," : "", - params.skip_haplocheck ? "" : "Haplocheck (Weissensteiner et al., 2021),", + "Haplocheck (Weissensteiner et al., 2021),", "CNVnator (Abyzov et al., 2011),", "TIDDIT (Eisfeldt et al., 2017),", "Manta (Chen et al., 2016),", @@ -333,7 +333,7 @@ def toolBibliographyText() { variant_call_text = [ params.variant_caller.equals("deepvariant") ? "
  • Poplin, R., Chang, P.-C., Alexander, D., Schwartz, S., Colthurst, T., Ku, A., Newburger, D., Dijamco, J., Nguyen, N., Afshar, P. T., Gross, S. S., Dorfman, L., McLean, C. Y., & DePristo, M. A. (2018). A universal SNP and small-indel variant caller using deep neural networks. Nature Biotechnology, 36(10), 983–987. https://doi.org/10.1038/nbt.4235
  • " : "", params.variant_caller.equals("sentieon") ? "
  • Freed, D., Pan, R., Chen, H., Li, Z., Hu, J., & Aldana, R. (2022). DNAscope: High accuracy small variant calling using machine learning [Preprint]. Bioinformatics. https://doi.org/10.1101/2022.05.20.492556
  • " : "", - params.skip_haplocheck ? "" : "
  • Weissensteiner, H., Forer, L., Fendt, L., Kheirkhah, A., Salas, A., Kronenberg, F., & Schoenherr, S. (2021). Contamination detection in sequencing studies using the mitochondrial phylogeny. Genome Research, 31(2), 309–316. https://doi.org/10.1101/gr.256545.119
  • ", + "
  • Weissensteiner, H., Forer, L., Fendt, L., Kheirkhah, A., Salas, A., Kronenberg, F., & Schoenherr, S. (2021). Contamination detection in sequencing studies using the mitochondrial phylogeny. Genome Research, 31(2), 309–316. https://doi.org/10.1101/gr.256545.119
  • ", "
  • Abyzov, A., Urban, A. E., Snyder, M., & Gerstein, M. (2011). CNVnator: An approach to discover, genotype, and characterize typical and atypical CNVs from family and population genome sequencing. Genome Research, 21(6), 974–984. https://doi.org/10.1101/gr.114876.110
  • ", "
  • Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2
  • ", "
  • Chen, X., Schulz-Trieglaff, O., Shaw, R., Barnes, B., Schlesinger, F., Källberg, M., Cox, A. J., Kruglyak, S., & Saunders, C. T. (2016). Manta: Rapid detection of structural variants and indels for germline and cancer sequencing applications. Bioinformatics, 32(8), 1220–1222. https://doi.org/10.1093/bioinformatics/btv710
  • ", diff --git a/subworkflows/local/variant_calling/call_snv_MT.nf b/subworkflows/local/variant_calling/call_snv_MT.nf index b0a2fbae..17135d33 100644 --- a/subworkflows/local/variant_calling/call_snv_MT.nf +++ b/subworkflows/local/variant_calling/call_snv_MT.nf @@ -23,12 +23,10 @@ workflow CALL_SNV_MT { GATK4_MUTECT2_MT (ch_bam_bai_int, ch_fasta, ch_fai, ch_dict, [], [], [],[]) - if (!params.skip_haplocheck) { - HAPLOCHECK_MT (GATK4_MUTECT2_MT.out.vcf).set { ch_haplocheck } - ch_versions = ch_versions.mix(HAPLOCHECK_MT.out.versions.first()) - ch_haplocheck_txt = HAPLOCHECK_MT.out.txt - ch_haplocheck_html = HAPLOCHECK_MT.out.html - } + HAPLOCHECK_MT (GATK4_MUTECT2_MT.out.vcf).set { ch_haplocheck } + ch_versions = ch_versions.mix(HAPLOCHECK_MT.out.versions.first()) + ch_haplocheck_txt = HAPLOCHECK_MT.out.txt + ch_haplocheck_html = HAPLOCHECK_MT.out.html // Filter Mutect2 calls ch_mutect_vcf = GATK4_MUTECT2_MT.out.vcf.join(GATK4_MUTECT2_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) From cd583e4806dec50fad012ffe0bb3678f55210ded Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 17:01:00 +0200 Subject: [PATCH 039/113] remove skip_qualimap --- CHANGELOG.md | 1 + conf/modules/qc_bam.config | 6 ++---- conf/test.config | 1 - conf/test_one_sample.config | 1 - nextflow.config | 1 - nextflow_schema.json | 5 ----- subworkflows/local/qc_bam.nf | 6 ++---- 7 files changed, 5 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 88ba11c2..f3416a3c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | | skip_sv_calling | | skip_eklipse | | | skip_haplocheck | | +| skip_qualimap | | ## 2.1.0 - Obelix [2024-05-29] diff --git a/conf/modules/qc_bam.config b/conf/modules/qc_bam.config index 29cd3900..a2a17364 100644 --- a/conf/modules/qc_bam.config +++ b/conf/modules/qc_bam.config @@ -35,10 +35,8 @@ process { ext.prefix = { "${meta.id}_hsmetrics" } } - if (!params.skip_qualimap) { - withName: '.*QC_BAM:QUALIMAP_BAMQC' { - ext.prefix = { "${meta.id}_qualimap" } - } + withName: '.*QC_BAM:QUALIMAP_BAMQC' { + ext.prefix = { "${meta.id}_qualimap" } } withName: '.*QC_BAM:TIDDIT_COV' { diff --git a/conf/test.config b/conf/test.config index 48848f04..29ac1bd9 100644 --- a/conf/test.config +++ b/conf/test.config @@ -26,7 +26,6 @@ params { // analysis params skip_fastqc = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_germlinecnvcaller = true - skip_qualimap = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI skip_mt_subsample = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip subsample on Github CI skip_peddy = true diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config index 66a9adc6..ef4a1503 100644 --- a/conf/test_one_sample.config +++ b/conf/test_one_sample.config @@ -26,7 +26,6 @@ params { // analysis params skip_fastqc = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_germlinecnvcaller = true - skip_qualimap = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI skip_mt_subsample = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip subsample on Github CI skip_peddy = true diff --git a/nextflow.config b/nextflow.config index 74d622ac..f02888a1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -35,7 +35,6 @@ params { skip_me_calling = false skip_me_annotation = false skip_mt_annotation = false - skip_qualimap = false skip_repeat_analysis = false skip_snv_annotation = false skip_snv_calling = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 7e2ff0f4..c8cfe3f7 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -533,11 +533,6 @@ "description": "Specifies whether or not to skip peddy.", "fa_icon": "fas fa-toggle-on" }, - "skip_qualimap": { - "type": "boolean", - "description": "Specifies whether or not to skip Qualimap.", - "fa_icon": "fas fa-toggle-on" - }, "skip_me_calling": { "type": "boolean", "description": "Specifies whether or not to skip calling mobile elements, and the subsequent annotation step.", diff --git a/subworkflows/local/qc_bam.nf b/subworkflows/local/qc_bam.nf index 26c4fa75..2b08eebb 100644 --- a/subworkflows/local/qc_bam.nf +++ b/subworkflows/local/qc_bam.nf @@ -45,10 +45,8 @@ workflow QC_BAM { PICARD_COLLECTHSMETRICS (ch_hsmetrics_in, ch_genome_fasta, ch_genome_fai, [[],[]]) - if (!params.skip_qualimap) { - ch_qualimap = QUALIMAP_BAMQC (ch_bam, []).results - ch_versions = ch_versions.mix(QUALIMAP_BAMQC.out.versions.first()) - } + ch_qualimap = QUALIMAP_BAMQC (ch_bam, []).results + ch_versions = ch_versions.mix(QUALIMAP_BAMQC.out.versions.first()) TIDDIT_COV (ch_bam, [[],[]]) // 2nd pos. arg is req. only for cram input From 38edf8a5dc23dc89fbe67375e418d559e4c0658d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 17:03:56 +0200 Subject: [PATCH 040/113] remove skip_fastqc --- CHANGELOG.md | 1 + conf/test.config | 1 - conf/test_one_sample.config | 1 - nextflow.config | 1 - nextflow_schema.json | 5 ----- .../local/utils_nfcore_raredisease_pipeline/main.nf | 4 ++-- workflows/raredisease.nf | 10 +++------- 7 files changed, 6 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f3416a3c..81d98c46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | | skip_snv_calling | | | skip_sv_calling | | skip_eklipse | | +| skip_fastqc | | | skip_haplocheck | | | skip_qualimap | | diff --git a/conf/test.config b/conf/test.config index 29ac1bd9..439d4875 100644 --- a/conf/test.config +++ b/conf/test.config @@ -24,7 +24,6 @@ params { mito_name = 'MT' // analysis params - skip_fastqc = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_germlinecnvcaller = true skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI skip_mt_subsample = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip subsample on Github CI diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config index ef4a1503..fc9b24cc 100644 --- a/conf/test_one_sample.config +++ b/conf/test_one_sample.config @@ -24,7 +24,6 @@ params { mito_name = 'MT' // analysis params - skip_fastqc = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_germlinecnvcaller = true skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI skip_mt_subsample = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip subsample on Github CI diff --git a/nextflow.config b/nextflow.config index f02888a1..4e3ed9a0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -28,7 +28,6 @@ params { run_rtgvcfeval = false save_mapped_as_cram = false skip_fastp = false - skip_fastqc = false skip_gens = true skip_germlinecnvcaller = false skip_peddy = false diff --git a/nextflow_schema.json b/nextflow_schema.json index c8cfe3f7..222f123a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -508,11 +508,6 @@ "description": "Specifies whether to generate and publish alignment files as cram instead of bam", "fa_icon": "fas fa-toggle-on" }, - "skip_fastqc": { - "type": "boolean", - "description": "Specifies whether or not to skip FASTQC.", - "fa_icon": "fas fa-toggle-on" - }, "skip_fastp": { "type": "boolean", "description": "Specifies whether or not to skip trimming with fastp.", diff --git a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf index 6bd3aaa6..c8dd5528 100644 --- a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf @@ -278,7 +278,7 @@ def toolCitationText() { "RetroSeq (Keane et al., 2013),", ] preprocessing_text = [ - params.skip_fastqc ? "" : "FastQC (Andrews 2010),", + "FastQC (Andrews 2010),", params.skip_fastp ? "" : "Fastp (Chen, 2023),", ] other_citation_text = [ @@ -389,7 +389,7 @@ def toolBibliographyText() { "
  • Keane, T. M., Wong, K., & Adams, D. J. (2013). RetroSeq: Transposable element discovery from next-generation sequencing data. Bioinformatics, 29(3), 389–390. https://doi.org/10.1093/bioinformatics/bts697
  • ", ] preprocessing_text = [ - params.skip_fastqc ? "" : "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
  • ", + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
  • ", params.skip_fastp ? "" : "
  • Chen, S. (2023). Ultrafast one-pass FASTQ data preprocessing, quality control, and deduplication using fastp. iMeta, 2(2), e107. https://doi.org/10.1002/imt2.107
  • ", ] other_citation_text = [ diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 8dff53ab..01212cc2 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -370,10 +370,8 @@ workflow RAREDISEASE { // // Input QC // - if (!params.skip_fastqc) { - FASTQC (ch_samplesheet) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - } + FASTQC (ch_samplesheet) + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) // // Create chromosome bed and intervals for splitting and gathering operations @@ -830,9 +828,7 @@ workflow RAREDISEASE { ) ) - if (!params.skip_fastqc) { - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - } + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(QC_BAM.out.multiple_metrics.map{it[1]}.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(QC_BAM.out.hs_metrics.map{it[1]}.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(QC_BAM.out.qualimap_results.map{it[1]}.collect().ifEmpty([])) From 391f8c9934fa5edc9425157d0d1c6b77709a1580 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 24 Jun 2024 18:07:34 +0200 Subject: [PATCH 041/113] add skip_smncopynumbercaller --- CHANGELOG.md | 26 ++++++++++++++------------ nextflow.config | 1 + nextflow_schema.json | 5 +++++ workflows/raredisease.nf | 5 ++++- 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 81d98c46..94d73091 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,13 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- A new parameter `skip_smncopynumbercaller` to skip smncopynumbercaller module[#574](https://github.com/nf-core/raredisease/pull/574) - A new parameter `skip_sv_calling` to skip sv calling workflow [#572](https://github.com/nf-core/raredisease/pull/572) - Two new parameters `skip_snv_calling` and `skip_repeat_analysis` to skip snv calling and repeat analysis respectively [#571](https://github.com/nf-core/raredisease/pull/571) - Two new parameters `mbuffer_mem` and `samtools_sort_threads` to control resources given to mbuffer and samtools sort in the bwameme module [#570](https://github.com/nf-core/raredisease/pull/570) ### `Changed` -- Remove several skip parameters that had been included in the pipeline to avoid failed CI tests [#574](https://github.com/nf-core/raredisease/pull/574) +- Remove several skip parameters that had been included in the pipeline to avoid failed CI tests (see parameters table below) [#574](https://github.com/nf-core/raredisease/pull/574) - `readcount_intervals` parameter is now mandatory for running germlinecnvcaller. [#570](https://github.com/nf-core/raredisease/pull/570) - Turn off CNVnator, TIDDIT, SMNCopyNumberCaller, Gens, and Vcf2cytosure for targeted analysis [#573](https://github.com/nf-core/raredisease/pull/573) @@ -24,17 +25,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Parameters -| Old parameter | New parameter | -| --------------- | --------------------- | -| | mbuffer_mem | -| | samtools_sort_threads | -| | skip_repeat_analysis | -| | skip_snv_calling | -| | skip_sv_calling | -| skip_eklipse | | -| skip_fastqc | | -| skip_haplocheck | | -| skip_qualimap | | +| Old parameter | New parameter | +| --------------- | ------------------------ | +| | mbuffer_mem | +| | samtools_sort_threads | +| | skip_repeat_analysis | +| | skip_snv_calling | +| | skip_sv_calling | +| skip_eklipse | | +| skip_fastqc | | +| skip_haplocheck | | +| skip_qualimap | | +| | skip_smncopynumbercaller | ## 2.1.0 - Obelix [2024-05-29] diff --git a/nextflow.config b/nextflow.config index 4e3ed9a0..1ec0f584 100644 --- a/nextflow.config +++ b/nextflow.config @@ -35,6 +35,7 @@ params { skip_me_annotation = false skip_mt_annotation = false skip_repeat_analysis = false + skip_smncopynumbercaller = false skip_snv_annotation = false skip_snv_calling = false skip_sv_annotation = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 222f123a..46e4bdbf 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -553,6 +553,11 @@ "description": "Specifies whether or not to skip calling and annotation of repeat expansions.", "fa_icon": "fas fa-toggle-on" }, + "skip_smncopynumbercaller": { + "type": "boolean", + "description": "Specifies whether or not to skip smncopynumbercaller.", + "fa_icon": "fas fa-toggle-on" + }, "skip_snv_annotation": { "type": "boolean", "description": "Specifies whether or not to skip annotate SNV subworkflow.", diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 01212cc2..eb349c78 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -90,6 +90,9 @@ if (!params.skip_gens) { mandatoryParams += ["gens_gnomad_pos", "gens_interval_list", "gens_pon_female", "gens_pon_male"] } +if (!params.skip_smncopynumbercaller) { + mandatoryParams += ["genome"] +} for (param in mandatoryParams.unique()) { if (params[param] == null) { println("params." + param + " not set.") @@ -693,7 +696,7 @@ workflow RAREDISEASE { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - if ( params.analysis_type.equals("wgs") ) { + if ( params.analysis_type.equals("wgs") && !params.skip_smncopynumbercaller ) { RENAME_BAM_FOR_SMNCALLER(ch_mapped.genome_marked_bam, "bam").output .collect{it} .toList() From 937250d5f4cd16b247d0eb19e18d68965c4c6023 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 25 Jun 2024 12:47:18 +0200 Subject: [PATCH 042/113] update module --- modules.json | 2 +- modules/nf-core/peddy/main.nf | 7 +- modules/nf-core/peddy/tests/main.nf.test | 35 +++++++ modules/nf-core/peddy/tests/main.nf.test.snap | 93 +++++++++++++++++++ modules/nf-core/peddy/tests/tags.yml | 2 + 5 files changed, 136 insertions(+), 3 deletions(-) create mode 100644 modules/nf-core/peddy/tests/main.nf.test create mode 100644 modules/nf-core/peddy/tests/main.nf.test.snap create mode 100644 modules/nf-core/peddy/tests/tags.yml diff --git a/modules.json b/modules.json index e400f22d..96664577 100644 --- a/modules.json +++ b/modules.json @@ -304,7 +304,7 @@ }, "peddy": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "bb8c1e039f2c1d603e8a93665d0386d5c6ac8e5e", "installed_by": ["modules"] }, "picard/addorreplacereadgroups": { diff --git a/modules/nf-core/peddy/main.nf b/modules/nf-core/peddy/main.nf index b6be28c6..0e533ec3 100644 --- a/modules/nf-core/peddy/main.nf +++ b/modules/nf-core/peddy/main.nf @@ -35,7 +35,7 @@ process PEDDY { cat <<-END_VERSIONS > versions.yml "${task.process}": - peddy: \$( peddy --version 2>&1 | sed 's/peddy, version //' ) + peddy: \$( peddy --version 2>&1 | tail -1 | sed 's/peddy, version //' ) END_VERSIONS """ @@ -49,6 +49,9 @@ process PEDDY { touch ${prefix}.peddy.ped touch ${prefix}.html - touch versions.yml + cat <<-END_VERSIONS > versions.yml + "${task.process}": + peddy: \$( peddy --version 2>&1 | tail -1 | sed 's/peddy, version //' ) + END_VERSIONS """ } diff --git a/modules/nf-core/peddy/tests/main.nf.test b/modules/nf-core/peddy/tests/main.nf.test new file mode 100644 index 00000000..892da6cf --- /dev/null +++ b/modules/nf-core/peddy/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process PEDDY" + script "modules/nf-core/peddy/main.nf" + process "PEDDY" + + tag "modules" + tag "modules_nfcore" + tag "peddy" + + test("test - peddy - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.test_data['homo_sapiens']['genome']['justhusky_minimal_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['justhusky_minimal_vcf_gz_tbi'], checkIfExists: true) + ] + input[1] = file(params.test_data['homo_sapiens']['genome']['justhusky_ped'], checkIfExists: true) + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/peddy/tests/main.nf.test.snap b/modules/nf-core/peddy/tests/main.nf.test.snap new file mode 100644 index 00000000..5de593f1 --- /dev/null +++ b/modules/nf-core/peddy/tests/main.nf.test.snap @@ -0,0 +1,93 @@ +{ + "test - peddy - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.vs.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.het_check.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.ped_check.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sex_check.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.peddy.ped:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,d3587e67aded68bcf24c47542efe012f" + ], + "csv": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.het_check.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.ped_check.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sex_check.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.vs.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "ped": [ + [ + { + "id": "test", + "single_end": false + }, + "test.peddy.ped:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "png": [ + + ], + "versions": [ + "versions.yml:md5,d3587e67aded68bcf24c47542efe012f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T09:28:05.418978589" + } +} \ No newline at end of file diff --git a/modules/nf-core/peddy/tests/tags.yml b/modules/nf-core/peddy/tests/tags.yml new file mode 100644 index 00000000..d8324a74 --- /dev/null +++ b/modules/nf-core/peddy/tests/tags.yml @@ -0,0 +1,2 @@ +mosdepth: + - "modules/nf-core/peddy/**" From 81010339b347febabde8f1fc29ba1c8708037c26 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 26 Jun 2024 12:48:58 +0200 Subject: [PATCH 043/113] add skip for repeat annotation --- CHANGELOG.md | 5 ++- .../modules/annotate_repeat_expansions.config | 41 +++++++++++++++++++ conf/modules/call_repeat_expansions.config | 21 ---------- nextflow.config | 4 +- nextflow_schema.json | 9 +++- .../local/annotate_repeat_expansions.nf | 34 +++++++++++++++ subworkflows/local/call_repeat_expansions.nf | 20 +-------- workflows/raredisease.nf | 17 +++++++- 8 files changed, 106 insertions(+), 45 deletions(-) create mode 100644 conf/modules/annotate_repeat_expansions.config create mode 100644 subworkflows/local/annotate_repeat_expansions.nf diff --git a/CHANGELOG.md b/CHANGELOG.md index 94d73091..5ab10277 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- Two new parameters `skip_repeat_annotation` and `skip_repeat_calling` to skip calling and annotation of repeat expansions [#574](https://github.com/nf-core/raredisease/pull/574) - A new parameter `skip_smncopynumbercaller` to skip smncopynumbercaller module[#574](https://github.com/nf-core/raredisease/pull/574) - A new parameter `skip_sv_calling` to skip sv calling workflow [#572](https://github.com/nf-core/raredisease/pull/572) - Two new parameters `skip_snv_calling` and `skip_repeat_analysis` to skip snv calling and repeat analysis respectively [#571](https://github.com/nf-core/raredisease/pull/571) @@ -14,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- Removed `skip_repeat_analysis` added in #571 [#574](https://github.com/nf-core/raredisease/pull/574) - Remove several skip parameters that had been included in the pipeline to avoid failed CI tests (see parameters table below) [#574](https://github.com/nf-core/raredisease/pull/574) - `readcount_intervals` parameter is now mandatory for running germlinecnvcaller. [#570](https://github.com/nf-core/raredisease/pull/570) - Turn off CNVnator, TIDDIT, SMNCopyNumberCaller, Gens, and Vcf2cytosure for targeted analysis [#573](https://github.com/nf-core/raredisease/pull/573) @@ -29,7 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | --------------- | ------------------------ | | | mbuffer_mem | | | samtools_sort_threads | -| | skip_repeat_analysis | +| | skip_repeat_calling | | | skip_snv_calling | | | skip_sv_calling | | skip_eklipse | | @@ -37,6 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | skip_haplocheck | | | skip_qualimap | | | | skip_smncopynumbercaller | +| | skip_repeat_annotation | ## 2.1.0 - Obelix [2024-05-29] diff --git a/conf/modules/annotate_repeat_expansions.config b/conf/modules/annotate_repeat_expansions.config new file mode 100644 index 00000000..c1d934ea --- /dev/null +++ b/conf/modules/annotate_repeat_expansions.config @@ -0,0 +1,41 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// Repeat expansion calling options +// + +process { + + withName: '.*ANNOTATE_REPEAT_EXPANSIONS:STRANGER' { + ext.prefix = { "${meta.id}_stranger" } + } + + withName: '.*ANNOTATE_REPEAT_EXPANSIONS:COMPRESS_STRANGER' { + ext.prefix = { "${meta.id}_repeat_expansion_stranger" } + ext.args = '--output-type z' + publishDir = [ + path: { "${params.outdir}/repeat_expansions" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*ANNOTATE_REPEAT_EXPANSIONS:INDEX_STRANGER' { + publishDir = [ + path: { "${params.outdir}/repeat_expansions" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } +} diff --git a/conf/modules/call_repeat_expansions.config b/conf/modules/call_repeat_expansions.config index a3c140a0..fd6738ae 100644 --- a/conf/modules/call_repeat_expansions.config +++ b/conf/modules/call_repeat_expansions.config @@ -58,25 +58,4 @@ process { ext.args = {"--notag"} } - withName: '.*CALL_REPEAT_EXPANSIONS:STRANGER' { - ext.prefix = { "${meta.id}_stranger" } - } - - withName: '.*CALL_REPEAT_EXPANSIONS:COMPRESS_STRANGER' { - ext.prefix = { "${meta.id}_repeat_expansion_stranger" } - ext.args = '--output-type z' - publishDir = [ - path: { "${params.outdir}/repeat_expansions" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - - withName: '.*CALL_REPEAT_EXPANSIONS:INDEX_STRANGER' { - publishDir = [ - path: { "${params.outdir}/repeat_expansions" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } } diff --git a/nextflow.config b/nextflow.config index 1ec0f584..0c76f47e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -34,7 +34,8 @@ params { skip_me_calling = false skip_me_annotation = false skip_mt_annotation = false - skip_repeat_analysis = false + skip_repeat_annotation = false + skip_repeat_calling = false skip_smncopynumbercaller = false skip_snv_annotation = false skip_snv_calling = false @@ -316,6 +317,7 @@ includeConfig 'conf/modules/annotate_consequence_pli.config' includeConfig 'conf/modules/annotate_genome_snvs.config' includeConfig 'conf/modules/annotate_mt_snvs.config' includeConfig 'conf/modules/annotate_structural_variants.config' +includeConfig 'conf/modules/annotate_repeat_expansions.config' includeConfig 'conf/modules/call_repeat_expansions.config' includeConfig 'conf/modules/call_snv.config' includeConfig 'conf/modules/call_structural_variants.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index 46e4bdbf..e8164492 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -548,9 +548,14 @@ "description": "Specifies whether or not to subsample mt alignment.", "fa_icon": "fas fa-toggle-on" }, - "skip_repeat_analysis": { + "skip_repeat_annotation": { "type": "boolean", - "description": "Specifies whether or not to skip calling and annotation of repeat expansions.", + "description": "Specifies whether or not to skip annotation of repeat expansions.", + "fa_icon": "fas fa-toggle-on" + }, + "skip_repeat_calling": { + "type": "boolean", + "description": "Specifies whether or not to skip calling of repeat expansions.", "fa_icon": "fas fa-toggle-on" }, "skip_smncopynumbercaller": { diff --git a/subworkflows/local/annotate_repeat_expansions.nf b/subworkflows/local/annotate_repeat_expansions.nf new file mode 100644 index 00000000..5f8b8ea0 --- /dev/null +++ b/subworkflows/local/annotate_repeat_expansions.nf @@ -0,0 +1,34 @@ +// +// Annotate repeat expansions +// + +include { BCFTOOLS_VIEW as COMPRESS_STRANGER } from '../../modules/nf-core/bcftools/view/main' +include { STRANGER } from '../../modules/nf-core/stranger/main' +include { TABIX_TABIX as INDEX_STRANGER } from '../../modules/nf-core/tabix/tabix/main' + +workflow ANNOTATE_REPEAT_EXPANSIONS { + take: + ch_variant_catalog // channel: [mandatory] [ path(variant_catalog.json) ] + ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ] + + main: + ch_versions = Channel.empty() + + // Annotate, compress and index + STRANGER ( ch_vcf, ch_variant_catalog ) + COMPRESS_STRANGER ( + STRANGER.out.vcf.map{ meta, vcf -> [meta, vcf, [] ]}, + [], [], [] + ) + INDEX_STRANGER ( COMPRESS_STRANGER.out.vcf ) + + ch_vcf_idx = COMPRESS_STRANGER.out.vcf.join(INDEX_STRANGER.out.tbi, failOnMismatch:true, failOnDuplicate:true) + + ch_versions = ch_versions.mix(STRANGER.out.versions.first()) + ch_versions = ch_versions.mix(COMPRESS_STRANGER.out.versions.first()) + ch_versions = ch_versions.mix(INDEX_STRANGER.out.versions.first()) + +emit: + vcf = ch_vcf_idx // channel: [ val(meta), path(vcf), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/call_repeat_expansions.nf b/subworkflows/local/call_repeat_expansions.nf index 130b7a5b..c45a92ff 100644 --- a/subworkflows/local/call_repeat_expansions.nf +++ b/subworkflows/local/call_repeat_expansions.nf @@ -4,14 +4,11 @@ include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_EXP } from '../../modules/nf-core/bcftools/norm/main' include { BCFTOOLS_REHEADER as BCFTOOLS_REHEADER_EXP } from '../../modules/nf-core/bcftools/reheader/main' -include { BCFTOOLS_VIEW as COMPRESS_STRANGER } from '../../modules/nf-core/bcftools/view/main' include { EXPANSIONHUNTER } from '../../modules/nf-core/expansionhunter/main' include { PICARD_RENAMESAMPLEINVCF as RENAMESAMPLE_EXP } from '../../modules/nf-core/picard/renamesampleinvcf/main' -include { STRANGER } from '../../modules/nf-core/stranger/main' include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' include { SVDB_MERGE as SVDB_MERGE_REPEATS } from '../../modules/nf-core/svdb/merge/main' -include { TABIX_TABIX as INDEX_STRANGER } from '../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_EXP_RENAME } from '../../modules/nf-core/tabix/tabix/main' workflow CALL_REPEAT_EXPANSIONS { @@ -63,29 +60,16 @@ workflow CALL_REPEAT_EXPANSIONS { SVDB_MERGE_REPEATS ( ch_svdb_merge_input, [] ) - // Annotate, compress and index - STRANGER ( SVDB_MERGE_REPEATS.out.vcf, ch_variant_catalog ) - COMPRESS_STRANGER ( - STRANGER.out.vcf.map{ meta, vcf -> [meta, vcf, [] ]}, - [], [], [] - ) - INDEX_STRANGER ( COMPRESS_STRANGER.out.vcf ) - - ch_vcf_idx = COMPRESS_STRANGER.out.vcf.join(INDEX_STRANGER.out.tbi, failOnMismatch:true, failOnDuplicate:true) - ch_versions = ch_versions.mix(EXPANSIONHUNTER.out.versions.first()) ch_versions = ch_versions.mix(BCFTOOLS_REHEADER_EXP.out.versions.first()) ch_versions = ch_versions.mix(RENAMESAMPLE_EXP.out.versions.first() ) ch_versions = ch_versions.mix(TABIX_EXP_RENAME.out.versions.first()) ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_EXP.out.versions.first()) ch_versions = ch_versions.mix(SVDB_MERGE_REPEATS.out.versions.first()) - ch_versions = ch_versions.mix(STRANGER.out.versions.first()) - ch_versions = ch_versions.mix(COMPRESS_STRANGER.out.versions.first()) - ch_versions = ch_versions.mix(INDEX_STRANGER.out.versions.first()) ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) emit: - vcf = ch_vcf_idx // channel: [ val(meta), path(vcf), path(tbi) ] - versions = ch_versions // channel: [ path(versions.yml) ] + vcf = SVDB_MERGE_REPEATS.out.vcf // channel: [ val(meta), path(vcf) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index eb349c78..da7dd562 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -31,7 +31,11 @@ if (params.run_rtgvcfeval) { mandatoryParams += ["rtg_truthvcfs"] } -if (!params.skip_repeat_analysis) { +if (!params.skip_repeat_calling) { + mandatoryParams += ["variant_catalog"] +} + +if (!params.skip_repeat_annotation) { mandatoryParams += ["variant_catalog"] } @@ -141,6 +145,7 @@ include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SV } from '../subworkf include { ANNOTATE_GENOME_SNVS } from '../subworkflows/local/annotate_genome_snvs' include { ANNOTATE_MOBILE_ELEMENTS } from '../subworkflows/local/annotate_mobile_elements' include { ANNOTATE_MT_SNVS } from '../subworkflows/local/annotate_mt_snvs' +include { ANNOTATE_REPEAT_EXPANSIONS } from '../subworkflows/local/annotate_repeat_expansions' include { ANNOTATE_STRUCTURAL_VARIANTS } from '../subworkflows/local/annotate_structural_variants' include { CALL_MOBILE_ELEMENTS } from '../subworkflows/local/call_mobile_elements' include { CALL_REPEAT_EXPANSIONS } from '../subworkflows/local/call_repeat_expansions' @@ -448,7 +453,7 @@ workflow RAREDISEASE { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - if (!params.skip_repeat_analysis && params.analysis_type.equals("wgs") ) { + if (!params.skip_repeat_calling && params.analysis_type.equals("wgs") ) { CALL_REPEAT_EXPANSIONS ( ch_mapped.genome_bam_bai, ch_variant_catalog, @@ -457,6 +462,14 @@ workflow RAREDISEASE { ch_genome_fai ) ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions) + + if (!params.skip_repeat_annotation) { + ANNOTATE_REPEAT_EXPANSIONS ( + ch_variant_catalog, + CALL_REPEAT_EXPANSIONS.out.vcf + ) + ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions) + } } From 877a37b0d2518828aa689ccee0f71470911d48b3 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 26 Jun 2024 16:19:06 +0200 Subject: [PATCH 044/113] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ab10277..05c3d729 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- Stub crashes due to peddy reported in [#566](https://github.com/nf-core/raredisease/issues/566) [#576](https://github.com/nf-core/raredisease/pull/576] - Docker manifest error from gnu-wget container [#570](https://github.com/nf-core/raredisease/pull/570) - Citations for bwameme [#563](https://github.com/nf-core/raredisease/pull/563) From 65b35cf5e5335936d2d2c388d318d03796e635a1 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 26 Jun 2024 22:02:17 +0200 Subject: [PATCH 045/113] update citations --- .../utils_nfcore_raredisease_pipeline/main.nf | 153 ++++++++++-------- 1 file changed, 88 insertions(+), 65 deletions(-) diff --git a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf index c8dd5528..dd3e1812 100644 --- a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf @@ -203,10 +203,11 @@ def genomeExistsError() { def toolCitationText() { def align_text = [] - def variant_call_text = [] - def repeat_call_text = [] + def repeats_text = [] def snv_annotation_text = [] + def snv_calls_text = [] def sv_annotation_text = [] + def sv_calls_text = [] def mt_annotation_text = [] def qc_bam_text = [] def me_calls_text = [] @@ -215,55 +216,67 @@ def toolCitationText() { def other_citation_text = [] align_text = [ - params.aligner.equals("bwa") ? "BWA (Li, 2013)," :"", - params.aligner.equals("bwamem2") ? "BWA-MEM2 (Vasimuddin et al., 2019)," : "", - params.aligner.equals("bwameme") ? "BWA-MEME (Jung et al., 2022)," : "", + params.aligner.equals("bwa") ? "BWA (Li, 2013)," :"", + params.aligner.equals("bwamem2") ? "BWA-MEM2 (Vasimuddin et al., 2019)," : "", + params.aligner.equals("bwameme") ? "BWA-MEME (Jung et al., 2022)," : "", params.aligner.equals("sentieon") ? "Sentieon DNASeq (Kendig et al., 2019)," : "", - params.aligner.equals("sentieon") ? "Sentieon Tools (Freed et al., 2017)," : "" + params.aligner.equals("sentieon") ? "Sentieon Tools (Freed et al., 2017)," : "" ] - variant_call_text = [ - params.variant_caller.equals("deepvariant") ? "DeepVariant (Poplin et al., 2018)," : "", - params.variant_caller.equals("sentieon") ? "Sentieon DNAscope (Freed et al., 2022)," : "", - "Haplocheck (Weissensteiner et al., 2021),", - "CNVnator (Abyzov et al., 2011),", - "TIDDIT (Eisfeldt et al., 2017),", - "Manta (Chen et al., 2016),", - "GLnexus (Yun et al., 2021),", - "eKLIPse (Goudenge et al., 2019),", - ] - repeat_call_text = [ - "ExpansionHunter (Dolzhenko et al., 2019),", - "stranger (Nilsson & Magnusson, 2021)," + repeats_text = [ + (!params.skip_repeat_calling && params.analysis_type.equals("wgs")) ? "ExpansionHunter (Dolzhenko et al., 2019)," : "", + (!params.skip_repeat_annotation && params.analysis_type.equals("wgs")) ? "stranger (Nilsson & Magnusson, 2021)," : "" ] if (!params.skip_snv_annotation) { snv_annotation_text = [ "CADD (Rentzsch et al., 2019, 2021),", "Vcfanno (Pedersen et al., 2016),", "VEP (McLaren et al., 2016),", - "Genmod (Magnusson et al., 2018),", + "Genmod (Magnusson et al., 2018)," + ] + } + if (!params.skip_snv_calling) { + snv_calls_text = [ + params.variant_caller.equals("deepvariant") ? "DeepVariant (Poplin et al., 2018)," : "", + params.variant_caller.equals("sentieon") ? "Sentieon DNAscope (Freed et al., 2022)," : "", + params.run_mt_for_wes ? "Haplocheck (Weissensteiner et al., 2021)," : "", + "GLnexus (Yun et al., 2021)," ] } if (!params.skip_sv_annotation) { sv_annotation_text = [ "SVDB (Eisfeldt et al., 2017),", "VEP (McLaren et al., 2016),", - "Genmod (Magnusson et al., 2018),", + "Genmod (Magnusson et al., 2018)," + ] + } + if (!params.skip_sv_calling) { + sv_calls_text = [ + params.analysis_type.equals("wgs") ? "CNVnator (Abyzov et al., 2011)," : "", + params.analysis_type.equals("wgs") ? "TIDDIT (Eisfeldt et al., 2017)," : "", + "Manta (Chen et al., 2016),", + params.analysis_type.equals("wgs") ? "eKLIPse (Goudenge et al., 2019)," : "" ] } - if (!params.skip_mt_annotation) { + if (!params.skip_mt_annotation && (params.analysis_type.equals("wgs") || params.run_mt_for_wes)) { mt_annotation_text = [ "CADD (Rentzsch et al., 2019, 2021),", "VEP (McLaren et al., 2016),", "Vcfanno (Pedersen et al., 2016),", "Hmtnote (Preste et al., 2019),", "HaploGrep2 (Weissensteiner et al., 2016),", - "Genmod (Magnusson et al., 2018),", + "Genmod (Magnusson et al., 2018)," ] } - if (!params.skip_me_annotation) { + if (!params.skip_me_annotation && params.analysis_type.equals("wgs")) { me_annotation_text = [ "VEP (McLaren et al., 2016),", + "SVDB (Eisfeldt et al., 2017)," + ] + } + if (!params.skip_me_calling && params.analysis_type.equals("wgs")) { + me_calls_text = [ "SVDB (Eisfeldt et al., 2017),", + "RetroSeq (Keane et al., 2013)," ] } qc_bam_text = [ @@ -271,33 +284,30 @@ def toolCitationText() { "Qualimap (Okonechnikov et al., 2016),", "TIDDIT (Eisfeldt et al., 2017),", "UCSC Bigwig and Bigbed (Kent et al., 2010),", - "Mosdepth (Pedersen & Quinlan, 2018),", - ] - me_calls_text = [ - "SVDB (Eisfeldt et al., 2017),", - "RetroSeq (Keane et al., 2013),", + "Mosdepth (Pedersen & Quinlan, 2018)," ] preprocessing_text = [ "FastQC (Andrews 2010),", - params.skip_fastp ? "" : "Fastp (Chen, 2023),", + params.skip_fastp ? "" : "Fastp (Chen, 2023)," ] other_citation_text = [ "BCFtools (Danecek et al., 2021),", "GATK (McKenna et al., 2010),", "MultiQC (Ewels et al. 2016),", - params.skip_peddy ? "" : "Peddy (Pedersen & Quinlan, 2017),", + params.skip_peddy ? "" : "Peddy (Pedersen & Quinlan, 2017),", params.run_rtgvcfeval ? "RTG Tools (Cleary et al., 2015)," : "", "SAMtools (Li et al., 2009),", - "SMNCopyNumberCaller (Chen et al., 2020),", + (!params.skip_smncopynumbercaller && params.analysis_type.equals("wgs")) ? "SMNCopyNumberCaller (Chen et al., 2020)," : "", "Tabix (Li, 2011)", "." ] def concat_text = align_text + - variant_call_text + - repeat_call_text + + repeats_text + snv_annotation_text + + snv_calls_text + sv_annotation_text + + sv_calls_text + mt_annotation_text + qc_bam_text + me_calls_text + @@ -312,10 +322,11 @@ def toolCitationText() { def toolBibliographyText() { def align_text = [] - def variant_call_text = [] - def repeat_call_text = [] + def repeats_text = [] def snv_annotation_text = [] + def snv_calls_text = [] def sv_annotation_text = [] + def sv_calls_text = [] def mt_annotation_text = [] def qc_bam_text = [] def me_calls_text = [] @@ -330,19 +341,9 @@ def toolBibliographyText() { params.aligner.equals("sentieon") ? "
  • Kendig, K. I., Baheti, S., Bockol, M. A., Drucker, T. M., Hart, S. N., Heldenbrand, J. R., Hernaez, M., Hudson, M. E., Kalmbach, M. T., Klee, E. W., Mattson, N. R., Ross, C. A., Taschuk, M., Wieben, E. D., Wiepert, M., Wildman, D. E., & Mainzer, L. S. (2019). Sentieon DNASeq Variant Calling Workflow Demonstrates Strong Computational Performance and Accuracy. Frontiers in Genetics, 10, 736. https://doi.org/10.3389/fgene.2019.00736
  • " : "", params.aligner.equals("sentieon") ? "
  • Freed, D., Aldana, R., Weber, J. A., & Edwards, J. S. (2017). The Sentieon Genomics Tools—A fast and accurate solution to variant calling from next-generation sequence data (p. 115717). bioRxiv. https://doi.org/10.1101/115717
  • " : "" ] - variant_call_text = [ - params.variant_caller.equals("deepvariant") ? "
  • Poplin, R., Chang, P.-C., Alexander, D., Schwartz, S., Colthurst, T., Ku, A., Newburger, D., Dijamco, J., Nguyen, N., Afshar, P. T., Gross, S. S., Dorfman, L., McLean, C. Y., & DePristo, M. A. (2018). A universal SNP and small-indel variant caller using deep neural networks. Nature Biotechnology, 36(10), 983–987. https://doi.org/10.1038/nbt.4235
  • " : "", - params.variant_caller.equals("sentieon") ? "
  • Freed, D., Pan, R., Chen, H., Li, Z., Hu, J., & Aldana, R. (2022). DNAscope: High accuracy small variant calling using machine learning [Preprint]. Bioinformatics. https://doi.org/10.1101/2022.05.20.492556
  • " : "", - "
  • Weissensteiner, H., Forer, L., Fendt, L., Kheirkhah, A., Salas, A., Kronenberg, F., & Schoenherr, S. (2021). Contamination detection in sequencing studies using the mitochondrial phylogeny. Genome Research, 31(2), 309–316. https://doi.org/10.1101/gr.256545.119
  • ", - "
  • Abyzov, A., Urban, A. E., Snyder, M., & Gerstein, M. (2011). CNVnator: An approach to discover, genotype, and characterize typical and atypical CNVs from family and population genome sequencing. Genome Research, 21(6), 974–984. https://doi.org/10.1101/gr.114876.110
  • ", - "
  • Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2
  • ", - "
  • Chen, X., Schulz-Trieglaff, O., Shaw, R., Barnes, B., Schlesinger, F., Källberg, M., Cox, A. J., Kruglyak, S., & Saunders, C. T. (2016). Manta: Rapid detection of structural variants and indels for germline and cancer sequencing applications. Bioinformatics, 32(8), 1220–1222. https://doi.org/10.1093/bioinformatics/btv710
  • ", - "
  • Yun, T., Li, H., Chang, P.-C., Lin, M. F., Carroll, A., & McLean, C. Y. (2021). Accurate, scalable cohort variant calls using DeepVariant and GLnexus. Bioinformatics, 36(24), 5582–5589. https://doi.org/10.1093/bioinformatics/btaa1081
  • ", - "
  • Goudenège, D., Bris, C., Hoffmann, V., Desquiret-Dumas, V., Jardel, C., Rucheton, B., Bannwarth, S., Paquis-Flucklinger, V., Lebre, A. S., Colin, E., Amati-Bonneau, P., Bonneau, D., Reynier, P., Lenaers, G., & Procaccio, V. (2019). eKLIPse: A sensitive tool for the detection and quantification of mitochondrial DNA deletions from next-generation sequencing data. Genetics in Medicine, 21(6), 1407–1416. https://doi.org/10.1038/s41436-018-0350-8
  • ", - ] - repeat_call_text = [ - "
  • Dolzhenko, E., Deshpande, V., Schlesinger, F., Krusche, P., Petrovski, R., Chen, S., Emig-Agius, D., Gross, A., Narzisi, G., Bowman, B., Scheffler, K., van Vugt, J. J. F. A., French, C., Sanchis-Juan, A., Ibáñez, K., Tucci, A., Lajoie, B. R., Veldink, J. H., Raymond, F. L., … Eberle, M. A. (2019). ExpansionHunter: A sequence-graph-based tool to analyze variation in short tandem repeat regions. Bioinformatics, 35(22), 4754–4756. https://doi.org/10.1093/bioinformatics/btz431
  • ", - "
  • Nilsson, D., & Magnusson, M. (2021). Moonso/stranger v0.7.1 (v0.7.1) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.4548873
  • " + repeats_text = [ + (!params.skip_repeat_calling && params.analysis_type.equals("wgs") ) ? "
  • Dolzhenko, E., Deshpande, V., Schlesinger, F., Krusche, P., Petrovski, R., Chen, S., Emig-Agius, D., Gross, A., Narzisi, G., Bowman, B., Scheffler, K., van Vugt, J. J. F. A., French, C., Sanchis-Juan, A., Ibáñez, K., Tucci, A., Lajoie, B. R., Veldink, J. H., Raymond, F. L., … Eberle, M. A. (2019). ExpansionHunter: A sequence-graph-based tool to analyze variation in short tandem repeat regions. Bioinformatics, 35(22), 4754–4756. https://doi.org/10.1093/bioinformatics/btz431
  • " : "", + (!params.skip_repeat_annotation && params.analysis_type.equals("wgs") ) ? "
  • Nilsson, D., & Magnusson, M. (2021). Moonso/stranger v0.7.1 (v0.7.1) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.4548873
  • " : "" ] if (!params.skip_snv_annotation) { snv_annotation_text = [ @@ -350,17 +351,35 @@ def toolBibliographyText() { "
  • Rentzsch, P., Witten, D., Cooper, G. M., Shendure, J., & Kircher, M. (2019). CADD: Predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Research, 47(D1), D886–D894. https://doi.org/10.1093/nar/gky1016
  • ", "
  • Pedersen, B. S., Layer, R. M., & Quinlan, A. R. (2016). Vcfanno: Fast, flexible annotation of genetic variants. Genome Biology, 17(1), 118. https://doi.org/10.1186/s13059-016-0973-5
  • ", "
  • McLaren, W., Gil, L., Hunt, S. E., Riat, H. S., Ritchie, G. R. S., Thormann, A., Flicek, P., & Cunningham, F. (2016). The Ensembl Variant Effect Predictor. Genome Biology, 17(1), 122. https://doi.org/10.1186/s13059-016-0974-4
  • ", - "
  • Magnusson, M., Hughes, T., Glabilloy, & Bitdeli Chef. (2018). genmod: Version 3.7.3 (3.7.3) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.3841142
  • ", + "
  • Magnusson, M., Hughes, T., Glabilloy, & Bitdeli Chef. (2018). genmod: Version 3.7.3 (3.7.3) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.3841142
  • " ] } + if (!params.skip_snv_calling) { + snv_calls_text = [ + params.variant_caller.equals("deepvariant") ? "
  • Poplin, R., Chang, P.-C., Alexander, D., Schwartz, S., Colthurst, T., Ku, A., Newburger, D., Dijamco, J., Nguyen, N., Afshar, P. T., Gross, S. S., Dorfman, L., McLean, C. Y., & DePristo, M. A. (2018). A universal SNP and small-indel variant caller using deep neural networks. Nature Biotechnology, 36(10), 983–987. https://doi.org/10.1038/nbt.4235
  • " : "", + params.variant_caller.equals("sentieon") ? "
  • Freed, D., Pan, R., Chen, H., Li, Z., Hu, J., & Aldana, R. (2022). DNAscope: High accuracy small variant calling using machine learning [Preprint]. Bioinformatics. https://doi.org/10.1101/2022.05.20.492556
  • " : "", + params.run_mt_for_wes ? "
  • Weissensteiner, H., Forer, L., Fendt, L., Kheirkhah, A., Salas, A., Kronenberg, F., & Schoenherr, S. (2021). Contamination detection in sequencing studies using the mitochondrial phylogeny. Genome Research, 31(2), 309–316. https://doi.org/10.1101/gr.256545.119
  • " : "", + "
  • Yun, T., Li, H., Chang, P.-C., Lin, M. F., Carroll, A., & McLean, C. Y. (2021). Accurate, scalable cohort variant calls using DeepVariant and GLnexus. Bioinformatics, 36(24), 5582–5589. https://doi.org/10.1093/bioinformatics/btaa1081
  • " + ] + } + if (!params.skip_sv_annotation) { sv_annotation_text = [ "
  • Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2
  • ", "
  • McLaren, W., Gil, L., Hunt, S. E., Riat, H. S., Ritchie, G. R. S., Thormann, A., Flicek, P., & Cunningham, F. (2016). The Ensembl Variant Effect Predictor. Genome Biology, 17(1), 122. https://doi.org/10.1186/s13059-016-0974-4
  • ", - "
  • Magnusson, M., Hughes, T., Glabilloy, & Bitdeli Chef. (2018). genmod: Version 3.7.3 (3.7.3) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.3841142
  • ", + "
  • Magnusson, M., Hughes, T., Glabilloy, & Bitdeli Chef. (2018). genmod: Version 3.7.3 (3.7.3) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.3841142
  • " ] } - if (!params.skip_mt_annotation) { + if (!params.skip_sv_calling) { + sv_calls_text = [ + params.analysis_type.equals("wgs") ? "
  • Abyzov, A., Urban, A. E., Snyder, M., & Gerstein, M. (2011). CNVnator: An approach to discover, genotype, and characterize typical and atypical CNVs from family and population genome sequencing. Genome Research, 21(6), 974–984. https://doi.org/10.1101/gr.114876.110
  • " : "", + params.analysis_type.equals("wgs") ? "
  • Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2
  • " : "", + "
  • Chen, X., Schulz-Trieglaff, O., Shaw, R., Barnes, B., Schlesinger, F., Källberg, M., Cox, A. J., Kruglyak, S., & Saunders, C. T. (2016). Manta: Rapid detection of structural variants and indels for germline and cancer sequencing applications. Bioinformatics, 32(8), 1220–1222. https://doi.org/10.1093/bioinformatics/btv710
  • ", + "
  • Goudenège, D., Bris, C., Hoffmann, V., Desquiret-Dumas, V., Jardel, C., Rucheton, B., Bannwarth, S., Paquis-Flucklinger, V., Lebre, A. S., Colin, E., Amati-Bonneau, P., Bonneau, D., Reynier, P., Lenaers, G., & Procaccio, V. (2019). eKLIPse: A sensitive tool for the detection and quantification of mitochondrial DNA deletions from next-generation sequencing data. Genetics in Medicine, 21(6), 1407–1416. https://doi.org/10.1038/s41436-018-0350-8
  • " + ] + } + + if (!params.skip_mt_annotation && (params.analysis_type.equals("wgs") || params.run_mt_for_wes)) { mt_annotation_text = [ "
  • Rentzsch, P., Schubach, M., Shendure, J., & Kircher, M. (2021). CADD-Splice—Improving genome-wide variant effect prediction using deep learning-derived splice scores. Genome Medicine, 13(1), 31. https://doi.org/10.1186/s13073-021-00835-9
  • ", "
  • Rentzsch, P., Witten, D., Cooper, G. M., Shendure, J., & Kircher, M. (2019). CADD: Predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Research, 47(D1), D886–D894. https://doi.org/10.1093/nar/gky1016
  • ", @@ -368,13 +387,19 @@ def toolBibliographyText() { "
  • McLaren, W., Gil, L., Hunt, S. E., Riat, H. S., Ritchie, G. R. S., Thormann, A., Flicek, P., & Cunningham, F. (2016). The Ensembl Variant Effect Predictor. Genome Biology, 17(1), 122. https://doi.org/10.1186/s13059-016-0974-4
  • ", "
  • Preste, R., Clima, R., & Attimonelli, M. (2019). Human mitochondrial variant annotation with HmtNote [Preprint]. Bioinformatics. https://doi.org/10.1101/600619
  • ", "
  • Weissensteiner, H., Pacher, D., Kloss-Brandstätter, A., Forer, L., Specht, G., Bandelt, H.-J., Kronenberg, F., Salas, A., & Schönherr, S. (2016). HaploGrep 2: Mitochondrial haplogroup classification in the era of high-throughput sequencing. Nucleic Acids Research, 44(W1), W58–W63. https://doi.org/10.1093/nar/gkw233
  • ", - "
  • Magnusson, M., Hughes, T., Glabilloy, & Bitdeli Chef. (2018). genmod: Version 3.7.3 (3.7.3) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.3841142
  • ", + "
  • Magnusson, M., Hughes, T., Glabilloy, & Bitdeli Chef. (2018). genmod: Version 3.7.3 (3.7.3) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.3841142
  • " ] } - if (!params.skip_me_annotation) { + if (!params.skip_me_annotation && params.analysis_type.equals("wgs")) { me_annotation_text = [ "
  • McLaren, W., Gil, L., Hunt, S. E., Riat, H. S., Ritchie, G. R. S., Thormann, A., Flicek, P., & Cunningham, F. (2016). The Ensembl Variant Effect Predictor. Genome Biology, 17(1), 122. https://doi.org/10.1186/s13059-016-0974-4
  • ", + "
  • Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2
  • " + ] + } + if (!params.skip_me_calling && params.analysis_type.equals("wgs")) { + me_calls_text = [ "
  • Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2
  • ", + "
  • Keane, T. M., Wong, K., & Adams, D. J. (2013). RetroSeq: Transposable element discovery from next-generation sequencing data. Bioinformatics, 29(3), 389–390. https://doi.org/10.1093/bioinformatics/bts697
  • " ] } qc_bam_text = [ @@ -382,32 +407,30 @@ def toolBibliographyText() { "
  • Okonechnikov, K., Conesa, A., & García-Alcalde, F. (2016). Qualimap 2: Advanced multi-sample quality control for high-throughput sequencing data. Bioinformatics, 32(2), 292–294. https://doi.org/10.1093/bioinformatics/btv566
  • ", "
  • Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2
  • ", "
  • Kent, W. J., Zweig, A. S., Barber, G., Hinrichs, A. S., & Karolchik, D. (2010). BigWig and BigBed: Enabling browsing of large distributed datasets. Bioinformatics, 26(17), 2204–2207. https://doi.org/10.1093/bioinformatics/btq351
  • ", - "
  • Pedersen, B. S., & Quinlan, A. R. (2018). Mosdepth: Quick coverage calculation for genomes and exomes. Bioinformatics, 34(5), 867–868. https://doi.org/10.1093/bioinformatics/btx699
  • ", - ] - me_calls_text = [ - "
  • Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2
  • ", - "
  • Keane, T. M., Wong, K., & Adams, D. J. (2013). RetroSeq: Transposable element discovery from next-generation sequencing data. Bioinformatics, 29(3), 389–390. https://doi.org/10.1093/bioinformatics/bts697
  • ", + "
  • Pedersen, B. S., & Quinlan, A. R. (2018). Mosdepth: Quick coverage calculation for genomes and exomes. Bioinformatics, 34(5), 867–868. https://doi.org/10.1093/bioinformatics/btx699
  • " ] preprocessing_text = [ "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
  • ", - params.skip_fastp ? "" : "
  • Chen, S. (2023). Ultrafast one-pass FASTQ data preprocessing, quality control, and deduplication using fastp. iMeta, 2(2), e107. https://doi.org/10.1002/imt2.107
  • ", + params.skip_fastp ? "" : "
  • Chen, S. (2023). Ultrafast one-pass FASTQ data preprocessing, quality control, and deduplication using fastp. iMeta, 2(2), e107. https://doi.org/10.1002/imt2.107
  • " ] + other_citation_text = [ "
  • Danecek, P., Bonfield, J. K., Liddle, J., Marshall, J., Ohan, V., Pollard, M. O., Whitwham, A., Keane, T., McCarthy, S. A., Davies, R. M., & Li, H. (2021). Twelve years of SAMtools and BCFtools. GigaScience, 10(2), giab008. https://doi.org/10.1093/gigascience/giab008
  • ", "
  • McKenna, A., Hanna, M., Banks, E., Sivachenko, A., Cibulskis, K., Kernytsky, A., Garimella, K., Altshuler, D., Gabriel, S., Daly, M., & DePristo, M. A. (2010). The Genome Analysis Toolkit: A MapReduce framework for analyzing next-generation DNA sequencing data. Genome Research, 20(9), 1297–1303. https://doi.org/10.1101/gr.107524.110
  • ", "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics, 32(19), 3047–3048. https://doi.org/10.1093/bioinformatics/btw354
  • ", - params.skip_peddy ? "" : "
  • Pedersen, B. S., & Quinlan, A. R. (2017). Who’s Who? Detecting and Resolving Sample Anomalies in Human DNA Sequencing Studies with Peddy. The American Journal of Human Genetics, 100(3), 406–413. https://doi.org/10.1016/j.ajhg.2017.01.017
  • ", + params.skip_peddy ? "" : "
  • Pedersen, B. S., & Quinlan, A. R. (2017). Who’s Who? Detecting and Resolving Sample Anomalies in Human DNA Sequencing Studies with Peddy. The American Journal of Human Genetics, 100(3), 406–413. https://doi.org/10.1016/j.ajhg.2017.01.017
  • ", params.run_rtgvcfeval ? "
  • Cleary, J. G., Braithwaite, R., Gaastra, K., Hilbush, B. S., Inglis, S., Irvine, S. A., Jackson, A., Littin, R., Rathod, M., Ware, D., Zook, J. M., Trigg, L., & Vega, F. M. D. L. (2015). Comparing Variant Call Files for Performance Benchmarking of Next-Generation Sequencing Variant Calling Pipelines (p. 023754). bioRxiv. https://doi.org/10.1101/023754
  • " : "", "
  • Li, H., Handsaker, B., Wysoker, A., Fennell, T., Ruan, J., Homer, N., Marth, G., Abecasis, G., Durbin, R., & 1000 Genome Project Data Processing Subgroup. (2009). The Sequence Alignment/Map format and SAMtools. Bioinformatics, 25(16), 2078–2079. https://doi.org/10.1093/bioinformatics/btp352
  • ", - "
  • Chen, X., Sanchis-Juan, A., French, C. E., Connell, A. J., Delon, I., Kingsbury, Z., Chawla, A., Halpern, A. L., Taft, R. J., Bentley, D. R., Butchbach, M. E. R., Raymond, F. L., & Eberle, M. A. (2020). Spinal muscular atrophy diagnosis and carrier screening from genome sequencing data. Genetics in Medicine, 22(5), 945–953. https://doi.org/10.1038/s41436-020-0754-0
  • ", - "
  • Li, H. (2011). Tabix: Fast retrieval of sequence features from generic TAB-delimited files. Bioinformatics, 27(5), 718–719. https://doi.org/10.1093/bioinformatics/btq671
  • ", + (!params.skip_smncopynumbercaller && params.analysis_type.equals("wgs")) ? "
  • Chen, X., Sanchis-Juan, A., French, C. E., Connell, A. J., Delon, I., Kingsbury, Z., Chawla, A., Halpern, A. L., Taft, R. J., Bentley, D. R., Butchbach, M. E. R., Raymond, F. L., & Eberle, M. A. (2020). Spinal muscular atrophy diagnosis and carrier screening from genome sequencing data. Genetics in Medicine, 22(5), 945–953. https://doi.org/10.1038/s41436-020-0754-0
  • " : "", + "
  • Li, H. (2011). Tabix: Fast retrieval of sequence features from generic TAB-delimited files. Bioinformatics, 27(5), 718–719. https://doi.org/10.1093/bioinformatics/btq671
  • " ] def concat_text = align_text + - variant_call_text + - repeat_call_text + + repeats_text + snv_annotation_text + + snv_calls_text + sv_annotation_text + + sv_calls_text + mt_annotation_text + qc_bam_text + me_calls_text + From 7962dfdd82dae4b655b3f1c9e0a60eaf8809e473 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 26 Jun 2024 22:06:56 +0200 Subject: [PATCH 046/113] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 05c3d729..11960733 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- Refactored tool citation list [#577](https://github.com/nf-core/raredisease/pull/577) - Removed `skip_repeat_analysis` added in #571 [#574](https://github.com/nf-core/raredisease/pull/574) - Remove several skip parameters that had been included in the pipeline to avoid failed CI tests (see parameters table below) [#574](https://github.com/nf-core/raredisease/pull/574) - `readcount_intervals` parameter is now mandatory for running germlinecnvcaller. [#570](https://github.com/nf-core/raredisease/pull/570) From 3cff43a324dc991de1bdaad8d63d59d647b50c41 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 1 Jul 2024 12:18:21 +0200 Subject: [PATCH 047/113] move versions into conditionals --- subworkflows/local/call_structural_variants.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf index e462e9d9..65f2f9ff 100644 --- a/subworkflows/local/call_structural_variants.nf +++ b/subworkflows/local/call_structural_variants.nf @@ -43,11 +43,13 @@ workflow CALL_STRUCTURAL_VARIANTS { .vcf .collect{it[1]} .set { tiddit_vcf } + ch_versions = ch_versions.mix(CALL_SV_TIDDIT.out.versions) CALL_SV_CNVNATOR (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_case_info) .vcf .collect{it[1]} .set { cnvnator_vcf } + ch_versions = ch_versions.mix(CALL_SV_CNVNATOR.out.versions) } if (!params.skip_germlinecnvcaller) { @@ -99,9 +101,7 @@ workflow CALL_STRUCTURAL_VARIANTS { TABIX_TABIX (SVDB_MERGE.out.vcf) - ch_versions = ch_versions.mix(CALL_SV_CNVNATOR.out.versions) ch_versions = ch_versions.mix(CALL_SV_MANTA.out.versions) - ch_versions = ch_versions.mix(CALL_SV_TIDDIT.out.versions) ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) ch_versions = ch_versions.mix(SVDB_MERGE.out.versions) From bcc23b2edfad4a92e5a21f0d0b064045af088bc8 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 1 Jul 2024 12:27:12 +0200 Subject: [PATCH 048/113] CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11960733..baaa303b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- Errors due to misplaced version statements [#578](https://github.com/nf-core/raredisease/pull/578) - Stub crashes due to peddy reported in [#566](https://github.com/nf-core/raredisease/issues/566) [#576](https://github.com/nf-core/raredisease/pull/576] - Docker manifest error from gnu-wget container [#570](https://github.com/nf-core/raredisease/pull/570) - Citations for bwameme [#563](https://github.com/nf-core/raredisease/pull/563) From a8dc0aab1356c83398d36d0ef3b2bac28fce1ff6 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 1 Jul 2024 17:33:36 +0200 Subject: [PATCH 049/113] make bcftools roh skippable for cases without affected samples --- conf/modules/annotate_genome_snvs.config | 3 ++- docs/output.md | 6 +++--- subworkflows/local/annotate_genome_snvs.nf | 18 ++++++++++++++---- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/conf/modules/annotate_genome_snvs.config b/conf/modules/annotate_genome_snvs.config index 60bcd259..17a623dd 100644 --- a/conf/modules/annotate_genome_snvs.config +++ b/conf/modules/annotate_genome_snvs.config @@ -18,6 +18,7 @@ process { withName: '.*ANNOTATE_GENOME_SNVS:BCFTOOLS_ROH' { + ext.when = { meta.probands.size()>0 } ext.args = { "--samples ${meta.probands.unique().first()} --skip-indels " } ext.prefix = { "${meta.id}_roh" } } @@ -31,7 +32,7 @@ process { } withName: '.*ANNOTATE_GENOME_SNVS:VCFANNO' { - ext.prefix = { "${vcf.simpleName}_vcfanno" } + ext.prefix = { "${meta.prefix}_vcfanno" } } withName: '.*ANNOTATE_GENOME_SNVS:UPD_SITES' { diff --git a/docs/output.md b/docs/output.md index 376acce9..046f26d1 100644 --- a/docs/output.md +++ b/docs/output.md @@ -68,7 +68,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Hmtnote](#hmtnote) - [VEP](#vep-2) - [Filtering and ranking](#filtering-and-ranking) - - [Filter_vep](#filter_vep) + - [Filter\_vep](#filter_vep) - [GENMOD](#genmod) - [Mobile element analysis](#mobile-element-analysis) - [Calling mobile elements](#calling-mobile-elements) @@ -387,8 +387,8 @@ Based on VEP annotations, custom scripts used by the pipeline further annotate e Output files - `annotate_snv/genome` - - `_rhocall_vcfanno_filter_.vcf.gz`: file containing bcftools roh, vcfanno, cadd and vep annotations. - - `_rhocall_vcfanno_filter_.vcf.gz.tbi`: index of the file containing bcftools roh, vcfanno, cadd and vep annotations. + - `__filter_.vcf.gz`: file containing bcftools roh, vcfanno, cadd and vep annotations. + - `__filter_.vcf.gz.tbi`: index of the file containing bcftools roh, vcfanno, cadd and vep annotations.
    diff --git a/subworkflows/local/annotate_genome_snvs.nf b/subworkflows/local/annotate_genome_snvs.nf index e2f10a1e..8d8a5f0d 100644 --- a/subworkflows/local/annotate_genome_snvs.nf +++ b/subworkflows/local/annotate_genome_snvs.nf @@ -53,11 +53,21 @@ workflow ANNOTATE_GENOME_SNVS { ZIP_TABIX_ROHCALL (RHOCALL_ANNOTATE.out.vcf) - ZIP_TABIX_ROHCALL.out.gz_tbi - .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]} - .set { ch_vcf_in } + ch_vcf + .join(ZIP_TABIX_ROHCALL.out.gz_tbi, remainder: true) + .branch { it -> + noroh: it[3].equals(null) + return [it[0] + [prefix: it[0].id], it[1], it[2]] + roh: !(it[3].equals(null)) + return [it[0] + [prefix: it[0].id + "_rhocall"], it[3], it[4]] + } + .set { ch_for_mix } + + ch_for_mix.noroh.mix(ch_for_mix.roh) + .map { meta, vcf, tbi -> return [meta, vcf, tbi, []] } + .set { ch_vcfanno_in } - VCFANNO (ch_vcf_in, ch_vcfanno_toml, ch_vcfanno_lua, ch_vcfanno_resources) + VCFANNO (ch_vcfanno_in, ch_vcfanno_toml, ch_vcfanno_lua, ch_vcfanno_resources) VCFANNO.out.vcf .map {meta, vcf -> From f87c76ed333bdab31b9536994feff9e0fd5c19c4 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 2 Jul 2024 14:14:01 +0200 Subject: [PATCH 050/113] add warning message --- workflows/raredisease.nf | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index da7dd562..ee8d9dbd 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -541,8 +541,17 @@ workflow RAREDISEASE { ) ch_versions = ch_versions.mix(ANN_CSQ_PLI_SNV.out.versions) + ANN_CSQ_PLI_SNV.out.vcf_ann + .filter { it -> + if (it[0].probands.size()==0) { + log.warn("Skipping nuclear SNV ranking since no affected samples are detected in the case") + } + it[0].probands.size()>0 + } + .set {ch_ranksnv_nuclear_in} + RANK_VARIANTS_SNV ( - ANN_CSQ_PLI_SNV.out.vcf_ann, + ch_ranksnv_nuclear_in, ch_pedfile, ch_reduced_penetrance, ch_score_config_snv @@ -582,8 +591,17 @@ workflow RAREDISEASE { ) ch_versions = ch_versions.mix(ANN_CSQ_PLI_MT.out.versions) + ANN_CSQ_PLI_MT.out.vcf_ann + .filter { it -> + if (it[0].probands.size()==0) { + log.warn("Skipping mitochondrial SNV ranking since no affected samples are detected in the case") + } + it[0].probands.size()>0 + } + .set {ch_ranksnv_mt_in} + RANK_VARIANTS_MT ( - ANN_CSQ_PLI_MT.out.vcf_ann, + ch_ranksnv_mt_in, ch_pedfile, ch_reduced_penetrance, ch_score_config_mt @@ -648,8 +666,17 @@ workflow RAREDISEASE { ) ch_versions = ch_versions.mix(ANN_CSQ_PLI_SV.out.versions) + ANN_CSQ_PLI_SV.out.vcf_ann + .filter { it -> + if (it[0].probands.size()==0) { + log.warn("Skipping SV ranking since no affected samples are detected in the case") + } + it[0].probands.size()>0 + } + .set {ch_ranksnv_sv_in} + RANK_VARIANTS_SV ( - ANN_CSQ_PLI_SV.out.vcf_ann, + ch_ranksnv_sv_in, ch_pedfile, ch_reduced_penetrance, ch_score_config_sv From 89f19954bb62848d34ab55aa63c00e092d2969a9 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 2 Jul 2024 14:19:54 +0200 Subject: [PATCH 051/113] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11960733..2257b5d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- Print warning messages if user intends to perform ranking when there are no affected samples [#579](https://github.com/nf-core/raredisease/pull/579) - Two new parameters `skip_repeat_annotation` and `skip_repeat_calling` to skip calling and annotation of repeat expansions [#574](https://github.com/nf-core/raredisease/pull/574) - A new parameter `skip_smncopynumbercaller` to skip smncopynumbercaller module[#574](https://github.com/nf-core/raredisease/pull/574) - A new parameter `skip_sv_calling` to skip sv calling workflow [#572](https://github.com/nf-core/raredisease/pull/572) @@ -15,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- Skip ROH calling with bcftools if there are no affected samples [#579](https://github.com/nf-core/raredisease/pull/579) - Refactored tool citation list [#577](https://github.com/nf-core/raredisease/pull/577) - Removed `skip_repeat_analysis` added in #571 [#574](https://github.com/nf-core/raredisease/pull/574) - Remove several skip parameters that had been included in the pipeline to avoid failed CI tests (see parameters table below) [#574](https://github.com/nf-core/raredisease/pull/574) From cccad16767b9ee4b06838c52bdf61dfd55f7e520 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 2 Jul 2024 14:34:36 +0200 Subject: [PATCH 052/113] prettier --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 046f26d1..508b115d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -68,7 +68,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Hmtnote](#hmtnote) - [VEP](#vep-2) - [Filtering and ranking](#filtering-and-ranking) - - [Filter\_vep](#filter_vep) + - [Filter_vep](#filter_vep) - [GENMOD](#genmod) - [Mobile element analysis](#mobile-element-analysis) - [Calling mobile elements](#calling-mobile-elements) From ae446ecef95637cebcfaf5e0d1633405964bc42b Mon Sep 17 00:00:00 2001 From: Anders Jemt Date: Thu, 4 Jul 2024 12:04:46 +0200 Subject: [PATCH 053/113] adds channels to meme mt align --- subworkflows/local/align.nf | 8 ++++++-- subworkflows/local/alignment/align_MT.nf | 20 +++++++++++--------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 2163767c..b2f91d27 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -100,7 +100,9 @@ workflow ALIGN { ch_genome_bwamemeindex, ch_genome_fasta, ch_genome_dictionary, - ch_genome_fai + ch_genome_fai, + val_mbuffer_mem, + val_sort_threads ) ALIGN_MT_SHIFT ( @@ -111,7 +113,9 @@ workflow ALIGN { ch_mtshift_bwamemeindex, ch_mtshift_fasta, ch_mtshift_dictionary, - ch_mtshift_fai + ch_mtshift_fai, + val_mbuffer_mem, + val_sort_threads ) ch_mt_marked_bam = ALIGN_MT.out.marked_bam diff --git a/subworkflows/local/alignment/align_MT.nf b/subworkflows/local/alignment/align_MT.nf index d2a9581d..14c65526 100644 --- a/subworkflows/local/alignment/align_MT.nf +++ b/subworkflows/local/alignment/align_MT.nf @@ -14,14 +14,16 @@ include { SAMTOOLS_SORT as SAMTOOLS_SORT_MT } fr workflow ALIGN_MT { take: - ch_fastq // channel: [mandatory] [ val(meta), [ path(reads) ] ] - ch_ubam // channel: [mandatory] [ val(meta), path(bam) ] - ch_bwaindex // channel: [mandatory for sentieon] [ val(meta), path(index) ] - ch_bwamem2index // channel: [mandatory for bwamem2] [ val(meta), path(index) ] - ch_bwamemeindex // channel: [mandatory for bwameme] [ val(meta), path(index) ] - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_fastq // channel: [mandatory] [ val(meta), [ path(reads) ] ] + ch_ubam // channel: [mandatory] [ val(meta), path(bam) ] + ch_bwaindex // channel: [mandatory for sentieon] [ val(meta), path(index) ] + ch_bwamem2index // channel: [mandatory for bwamem2] [ val(meta), path(index) ] + ch_bwamemeindex // channel: [mandatory for bwameme] [ val(meta), path(index) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_fai // channel: [mandatory] [ val(meta), path(fai) ] + val_mbuffer_mem // integer: [mandatory] default: 3072 + val_sort_threads // integer: [mandatory] default: 4 main: ch_versions = Channel.empty() @@ -39,7 +41,7 @@ workflow ALIGN_MT { ch_align = BWA_MEM_MT.out.bam ch_versions = ch_versions.mix(BWA_MEM_MT.out.versions.first()) } else if (params.aligner.equals("bwameme")) { - BWAMEME_MEM_MT (ch_fastq, ch_bwamemeindex, ch_fasta, true) + BWAMEME_MEM_MT (ch_fastq, ch_bwamemeindex, ch_fasta, true, val_mbuffer_mem, val_sort_threads) ch_align = BWAMEME_MEM_MT.out.bam ch_versions = ch_versions.mix(BWAMEME_MEM_MT.out.versions.first()) } From 11324e98b20edbb520567e7a934ff5c87a8bcc5c Mon Sep 17 00:00:00 2001 From: Eirini Liampa Date: Thu, 25 Jul 2024 14:49:05 +0200 Subject: [PATCH 054/113] fix: added publishDir mode in raredisease.config --- conf/modules/raredisease.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conf/modules/raredisease.config b/conf/modules/raredisease.config index 4499800d..5298800d 100644 --- a/conf/modules/raredisease.config +++ b/conf/modules/raredisease.config @@ -21,6 +21,7 @@ process { ext.args = '--quiet --dir ./' publishDir = [ path: { "${params.outdir}/fastqc/${meta.id}" }, + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } @@ -67,6 +68,7 @@ process { withName: '.*RAREDISEASE:CREATE_PEDIGREE_FILE' { publishDir = [ path: { "${params.outdir}/pedigree" }, + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } From 5f7deeb1ce70c9baa9286fecd2dab280669abe86 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 2 Aug 2024 13:37:44 +0200 Subject: [PATCH 055/113] scatter count --- conf/modules/annotate_genome_snvs.config | 13 +-- conf/modules/scatter_genome.config | 2 +- nextflow.config | 1 + nextflow_schema.json | 6 ++ subworkflows/local/annotate_genome_snvs.nf | 102 +++++++++--------- subworkflows/local/annotate_mt_snvs.nf | 6 +- .../local/annotation/annotate_cadd.nf | 6 +- workflows/raredisease.nf | 1 + 8 files changed, 70 insertions(+), 67 deletions(-) diff --git a/conf/modules/annotate_genome_snvs.config b/conf/modules/annotate_genome_snvs.config index 17a623dd..fe8d807f 100644 --- a/conf/modules/annotate_genome_snvs.config +++ b/conf/modules/annotate_genome_snvs.config @@ -31,13 +31,18 @@ process { ext.prefix = { "${input.simpleName}" } } + withName: '.*ANNOTATE_GENOME_SNVS:GATK4_SELECTVARIANTS' { + ext.prefix = { "${vcf.simpleName}_${intervals.simpleName}" } + } + withName: '.*ANNOTATE_GENOME_SNVS:VCFANNO' { - ext.prefix = { "${meta.prefix}_vcfanno" } + ext.prefix = { "${meta.prefix}_vcfanno_${meta.scatterid}" } } withName: '.*ANNOTATE_GENOME_SNVS:UPD_SITES' { ext.prefix = { "${vcf.simpleName}_upd_sites" } ext.args = {"--af-tag GNOMADAF --proband ${meta.upd_child} --mother ${meta.mother} --father ${meta.father} sites"} + ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun } } withName: '.*ANNOTATE_GENOME_SNVS:UPD_REGIONS' { @@ -74,14 +79,10 @@ process { } withName: '.*ANNOTATE_GENOME_SNVS:BCFTOOLS_VIEW' { - ext.prefix = { "${vcf.simpleName}_filter" } + ext.prefix = { "${meta.prefix}_filter_${meta.scatterid}" } ext.args = { '--output-type z --exclude "INFO/GNOMADAF > 0.70 | INFO/GNOMADAF_popmax > 0.70" ' } } - withName: '.*ANNOTATE_GENOME_SNVS:GATK4_SELECTVARIANTS' { - ext.prefix = { "${vcf.simpleName}_${intervals.simpleName}" } - } - withName: '.*ANNOTATE_GENOME_SNVS:ENSEMBLVEP_SNV' { ext.prefix = { "${meta.prefix}_vep_${meta.scatterid}" } ext.args = { [ diff --git a/conf/modules/scatter_genome.config b/conf/modules/scatter_genome.config index b2fe363a..07c3602e 100644 --- a/conf/modules/scatter_genome.config +++ b/conf/modules/scatter_genome.config @@ -21,7 +21,7 @@ process { } withName: '.*SCATTER_GENOME:GATK4_SPLITINTERVALS' { - ext.args = { "--subdivision-mode BALANCING_WITHOUT_INTERVAL_SUBDIVISION --scatter-count 22" } + ext.args = { "--scatter-count ${params.scatter_count}" } ext.when = { !params.skip_snv_annotation } ext.prefix = { "${meta.id}_genome_intervals" } publishDir = [ diff --git a/nextflow.config b/nextflow.config index 0c76f47e..b7857df5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -27,6 +27,7 @@ params { run_mt_for_wes = false run_rtgvcfeval = false save_mapped_as_cram = false + scatter_count = 20 skip_fastp = false skip_gens = true skip_germlinecnvcaller = false diff --git a/nextflow_schema.json b/nextflow_schema.json index e8164492..81ef4683 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -508,6 +508,12 @@ "description": "Specifies whether to generate and publish alignment files as cram instead of bam", "fa_icon": "fas fa-toggle-on" }, + "scatter_count": { + "type": "integer", + "default": 20, + "description": "Number of intervals to split your genome into (used to parallelize annotations)", + "fa_icon": "fas fa-less-than" + }, "skip_fastp": { "type": "boolean", "description": "Specifies whether or not to skip trimming with fastp.", diff --git a/subworkflows/local/annotate_genome_snvs.nf b/subworkflows/local/annotate_genome_snvs.nf index 8d8a5f0d..8de4a526 100644 --- a/subworkflows/local/annotate_genome_snvs.nf +++ b/subworkflows/local/annotate_genome_snvs.nf @@ -55,80 +55,55 @@ workflow ANNOTATE_GENOME_SNVS { ch_vcf .join(ZIP_TABIX_ROHCALL.out.gz_tbi, remainder: true) - .branch { it -> - noroh: it[3].equals(null) - return [it[0] + [prefix: it[0].id], it[1], it[2]] - roh: !(it[3].equals(null)) - return [it[0] + [prefix: it[0].id + "_rhocall"], it[3], it[4]] + .combine(ch_split_intervals) + .map { it -> + if (it[3].equals(null)) { + return [it[0] + [prefix: it[0].id, scatterid:it[4].baseName], it[1], it[2], it[4]] + } else { + return [it[0] + [prefix: it[0].id + "_rhocall", scatterid:it[5].baseName], it[3], it[4], it[5]] + } } - .set { ch_for_mix } + .set { ch_vcf_scatter_in } + + GATK4_SELECTVARIANTS (ch_vcf_scatter_in) - ch_for_mix.noroh.mix(ch_for_mix.roh) + GATK4_SELECTVARIANTS.out.vcf + .join(GATK4_SELECTVARIANTS.out.tbi) .map { meta, vcf, tbi -> return [meta, vcf, tbi, []] } .set { ch_vcfanno_in } VCFANNO (ch_vcfanno_in, ch_vcfanno_toml, ch_vcfanno_lua, ch_vcfanno_resources) - VCFANNO.out.vcf - .map {meta, vcf -> - def splitchannels = [] - for (int i=0; i< meta.upd_children.size(); i++) { - upd_sample = meta.upd_children[i] - new_meta = meta + [upd_child:upd_sample] - splitchannels.add([new_meta,vcf]) - } - return splitchannels - } - .flatten() - .buffer (size: 2) - .set { ch_upd_in } - - UPD_SITES(ch_upd_in) - UPD_REGIONS(ch_upd_in) - CHROMOGRAPH_SITES([[],[]], [[],[]], [[],[]], [[],[]], [[],[]], [[],[]], UPD_SITES.out.bed) - CHROMOGRAPH_REGIONS([[],[]], [[],[]], [[],[]], [[],[]], [[],[]], UPD_REGIONS.out.bed, [[],[]]) - ZIP_TABIX_VCFANNO (VCFANNO.out.vcf) - //rhocall_viz - ANNOTATE_RHOCALLVIZ(ZIP_TABIX_VCFANNO.out.gz_tbi, ch_samples, ch_genome_chrsizes) - BCFTOOLS_VIEW(ZIP_TABIX_VCFANNO.out.gz_tbi, [], [], []) // filter on frequencies - TABIX_BCFTOOLS_VIEW (BCFTOOLS_VIEW.out.vcf) - - BCFTOOLS_VIEW.out.vcf - .join(TABIX_BCFTOOLS_VIEW.out.tbi, failOnMismatch:true, failOnDuplicate:true) - .collect() - .combine(ch_split_intervals) - .map { - meta, vcf, tbi, interval -> - return [meta + [scatterid:interval.baseName, prefix: vcf.simpleName], vcf, tbi, interval] - } - .set { ch_vcf_scatter_in } - - GATK4_SELECTVARIANTS (ch_vcf_scatter_in) - // Annotating with CADD if (params.cadd_resources != null) { + TABIX_BCFTOOLS_VIEW (BCFTOOLS_VIEW.out.vcf) + + BCFTOOLS_VIEW.out.vcf + .join(TABIX_BCFTOOLS_VIEW.out.tbi, failOnMismatch:true, failOnDuplicate:true) + .set { ch_cadd_in } + ANNOTATE_CADD ( - GATK4_SELECTVARIANTS.out.vcf, - GATK4_SELECTVARIANTS.out.tbi, + ch_cadd_in, ch_cadd_header, ch_cadd_resources ) ch_cadd_vcf = ANNOTATE_CADD.out.vcf ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions) + ch_versions = ch_versions.mix(TABIX_BCFTOOLS_VIEW.out.versions) } // If CADD is run, pick CADD output as input for VEP else pass selectvariants output to VEP. - GATK4_SELECTVARIANTS.out.vcf + BCFTOOLS_VIEW.out.vcf .join(ch_cadd_vcf, remainder: true) // If CADD is not run then the third element in this channel will be `null` .branch { it -> // If CADD is run, then "it" will be [[meta],selvar.vcf,cadd.vcf], else [[meta],selvar.vcf,null] selvar: it[2].equals(null) - return [it[0], it[1]] + return [it[0] + [prefix: it[0].prefix + "_filter"], it[1]] cadd: !(it[2].equals(null)) - return [it[0] + [prefix: it[0].prefix + "_cadd"], it[2]] + return [it[0] + [prefix: it[0].prefix + "_filter_cadd"], it[2]] } .set { ch_for_mix } @@ -136,7 +111,6 @@ workflow ANNOTATE_GENOME_SNVS { .map { meta, vcf -> return [meta, vcf, []] } .set { ch_vep_in } - // Annotating with ensembl Vep ENSEMBLVEP_SNV( ch_vep_in, @@ -166,14 +140,37 @@ workflow ANNOTATE_GENOME_SNVS { BCFTOOLS_CONCAT (ch_concat_in) + BCFTOOLS_CONCAT.out.vcf + .map {meta, vcf -> + def splitchannels = [] + for (int i=0; i< meta.upd_children.size(); i++) { + upd_sample = meta.upd_children[i] + new_meta = meta + [upd_child:upd_sample, prefix: meta.prefix + "_vcfanno"] + splitchannels.add([new_meta,vcf]) + } + return splitchannels + } + .flatten() + .buffer (size: 2) + .set { ch_upd_in } + + UPD_SITES(ch_upd_in) + UPD_REGIONS(ch_upd_in) + CHROMOGRAPH_SITES([[],[]], [[],[]], [[],[]], [[],[]], [[],[]], [[],[]], UPD_SITES.out.bed) + CHROMOGRAPH_REGIONS([[],[]], [[],[]], [[],[]], [[],[]], [[],[]], UPD_REGIONS.out.bed, [[],[]]) + + BCFTOOLS_CONCAT.out.vcf .map { meta, vcf -> [meta - meta.subMap('prefix'), vcf] } .set { ch_concat_out } TABIX_BCFTOOLS_CONCAT (ch_concat_out) - ch_vep_ann = ch_concat_out - ch_vep_index = TABIX_BCFTOOLS_CONCAT.out.tbi + ch_vep_ann = ch_concat_out + ch_vep_index = TABIX_BCFTOOLS_CONCAT.out.tbi + ch_vep_ann_index = ch_concat_out.join(TABIX_BCFTOOLS_CONCAT.out.tbi) + //rhocall_viz + ANNOTATE_RHOCALLVIZ(ch_vep_ann_index, ch_samples, ch_genome_chrsizes) ch_versions = ch_versions.mix(BCFTOOLS_ROH.out.versions) ch_versions = ch_versions.mix(RHOCALL_ANNOTATE.out.versions) @@ -185,13 +182,12 @@ workflow ANNOTATE_GENOME_SNVS { ch_versions = ch_versions.mix(CHROMOGRAPH_REGIONS.out.versions) ch_versions = ch_versions.mix(ZIP_TABIX_VCFANNO.out.versions) ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions) - ch_versions = ch_versions.mix(TABIX_BCFTOOLS_VIEW.out.versions) ch_versions = ch_versions.mix(GATK4_SELECTVARIANTS.out.versions.first()) ch_versions = ch_versions.mix(ENSEMBLVEP_SNV.out.versions.first()) ch_versions = ch_versions.mix(TABIX_VEP.out.versions.first()) ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions) ch_versions = ch_versions.mix(TABIX_BCFTOOLS_CONCAT.out.versions) - ch_versions = ch_versions.mix(ANNOTATE_RHOCALLVIZ.out.versions) + //ch_versions = ch_versions.mix(ANNOTATE_RHOCALLVIZ.out.versions) emit: vcf_ann = ch_vep_ann // channel: [ val(meta), path(vcf) ] diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf index 02554461..aab01ef6 100644 --- a/subworkflows/local/annotate_mt_snvs.nf +++ b/subworkflows/local/annotate_mt_snvs.nf @@ -19,6 +19,7 @@ workflow ANNOTATE_MT_SNVS { ch_cadd_header // channel: [mandatory] [ path(txt) ] ch_cadd_resources // channel: [mandatory] [ path(annotation) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_vcfanno_lua // channel: [mandatory] [ path(lua) ] ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] val_vep_genome // string: [mandatory] GRCh37 or GRCh38 @@ -47,7 +48,7 @@ workflow ANNOTATE_MT_SNVS { .map { meta, vcf, tbi -> return [meta + [prefix: meta.prefix + "_vcfanno"], vcf, tbi, []]} .set { ch_in_vcfanno } - VCFANNO_MT(ch_in_vcfanno, ch_vcfanno_toml, [], ch_vcfanno_resources) + VCFANNO_MT(ch_in_vcfanno, ch_vcfanno_toml, ch_vcfanno_lua, ch_vcfanno_resources) ZIP_TABIX_VCFANNO_MT(VCFANNO_MT.out.vcf) ch_vcfanno_vcf = ZIP_TABIX_VCFANNO_MT.out.gz_tbi.map{meta, vcf, tbi -> return [meta, vcf]} @@ -56,8 +57,7 @@ workflow ANNOTATE_MT_SNVS { // Annotating with CADD if (params.cadd_resources != null) { ANNOTATE_CADD ( - ch_vcfanno_vcf, - ch_vcfanno_tbi, + ZIP_TABIX_VCFANNO_MT.out.gz_tbi, ch_cadd_header, ch_cadd_resources ) diff --git a/subworkflows/local/annotation/annotate_cadd.nf b/subworkflows/local/annotation/annotate_cadd.nf index e471dd21..12c596a7 100644 --- a/subworkflows/local/annotation/annotate_cadd.nf +++ b/subworkflows/local/annotation/annotate_cadd.nf @@ -12,15 +12,14 @@ include { TABIX_TABIX as TABIX_VIEW } from '../../../modules/nf-core/tabix/t workflow ANNOTATE_CADD { take: - ch_vcf // channel: [mandatory] [ val(meta), path(vcfs) ] - ch_index // channel: [mandatory] [ val(meta), path(tbis) ] + ch_vcf // channel: [mandatory] [ val(meta), path(vcfs), path(idx) ] ch_header // channel: [mandatory] [ path(txt) ] ch_cadd_resources // channel: [mandatory] [ path(dir) ] main: ch_versions = Channel.empty() - BCFTOOLS_VIEW(ch_vcf.join(ch_index), [], [], []) + BCFTOOLS_VIEW(ch_vcf, [], [], []) TABIX_VIEW(BCFTOOLS_VIEW.out.vcf) @@ -29,7 +28,6 @@ workflow ANNOTATE_CADD { TABIX_CADD(CADD.out.tsv) ch_vcf - .join(ch_index) .join(CADD.out.tsv) .join(TABIX_CADD.out.tbi) .combine(ch_header) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index ee8d9dbd..10a7e6ae 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -570,6 +570,7 @@ workflow RAREDISEASE { ch_cadd_header, ch_cadd_resources, ch_genome_fasta, + ch_vcfanno_lua, ch_vcfanno_resources, ch_vcfanno_toml, params.genome, From 8179ce9a5d4e47da99cf89380c30e8b5e5c9914e Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 2 Aug 2024 16:12:41 +0200 Subject: [PATCH 056/113] Changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8481d2da..adced3d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- A new parameter `scatter_count` to control how many interval files are created from a genome (used to parallelize annotations) [#585](https://github.com/nf-core/raredisease/pull/585) - Print warning messages if user intends to perform ranking when there are no affected samples [#579](https://github.com/nf-core/raredisease/pull/579) - Two new parameters `skip_repeat_annotation` and `skip_repeat_calling` to skip calling and annotation of repeat expansions [#574](https://github.com/nf-core/raredisease/pull/574) - A new parameter `skip_smncopynumbercaller` to skip smncopynumbercaller module[#574](https://github.com/nf-core/raredisease/pull/574) @@ -16,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- Parallelized vcfanno [#585](https://github.com/nf-core/raredisease/pull/585) - Skip ROH calling with bcftools if there are no affected samples [#579](https://github.com/nf-core/raredisease/pull/579) - Refactored tool citation list [#577](https://github.com/nf-core/raredisease/pull/577) - Removed `skip_repeat_analysis` added in #571 [#574](https://github.com/nf-core/raredisease/pull/574) @@ -45,6 +47,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | skip_qualimap | | | | skip_smncopynumbercaller | | | skip_repeat_annotation | +| | scatter_count | ## 2.1.0 - Obelix [2024-05-29] From 8c452ff90392edca59b6d10d2f5e8c9183004941 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 6 Aug 2024 13:51:58 +0200 Subject: [PATCH 057/113] Update annotate_genome_snvs.nf --- subworkflows/local/annotate_genome_snvs.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/annotate_genome_snvs.nf b/subworkflows/local/annotate_genome_snvs.nf index 8de4a526..8ae123f7 100644 --- a/subworkflows/local/annotate_genome_snvs.nf +++ b/subworkflows/local/annotate_genome_snvs.nf @@ -187,7 +187,7 @@ workflow ANNOTATE_GENOME_SNVS { ch_versions = ch_versions.mix(TABIX_VEP.out.versions.first()) ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions) ch_versions = ch_versions.mix(TABIX_BCFTOOLS_CONCAT.out.versions) - //ch_versions = ch_versions.mix(ANNOTATE_RHOCALLVIZ.out.versions) + ch_versions = ch_versions.mix(ANNOTATE_RHOCALLVIZ.out.versions) emit: vcf_ann = ch_vep_ann // channel: [ val(meta), path(vcf) ] From 9546ba23c98a52903e1d9bf28250335f5e260416 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 7 Aug 2024 16:48:32 +0200 Subject: [PATCH 058/113] fix [skip ci] --- workflows/raredisease.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 10a7e6ae..082dcc26 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -331,7 +331,7 @@ workflow RAREDISEASE { if (params.analysis_type.equals("wgs")) { ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"]) } else { - ch_svcaller_priority = Channel.value(["manta"]) + ch_svcaller_priority = Channel.value([]) } } else { if (params.analysis_type.equals("wgs")) { From ec39dd1c1534ffbf565d5508db00f096bea55962 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 7 Aug 2024 17:17:40 +0200 Subject: [PATCH 059/113] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index adced3d8..642b1dd5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- Error in SVDB merge when only a single SV caller is run [#586](https://github.com/nf-core/raredisease/pull/586) - Errors due to misplaced version statements [#578](https://github.com/nf-core/raredisease/pull/578) - Stub crashes due to peddy reported in [#566](https://github.com/nf-core/raredisease/issues/566) [#576](https://github.com/nf-core/raredisease/pull/576] - Docker manifest error from gnu-wget container [#570](https://github.com/nf-core/raredisease/pull/570) From b175df13d99a0ab9d436e13d33f3f33efcd1c0ad Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 8 Aug 2024 08:14:00 +0200 Subject: [PATCH 060/113] update module --- modules.json | 2 +- modules/nf-core/deepvariant/README.md | 4 +- modules/nf-core/deepvariant/main.nf | 18 +- .../deepvariant/tests/main.nf.test.snap | 520 +++++++++--------- 4 files changed, 277 insertions(+), 267 deletions(-) diff --git a/modules.json b/modules.json index 96664577..9fe5ae84 100644 --- a/modules.json +++ b/modules.json @@ -113,7 +113,7 @@ }, "deepvariant": { "branch": "master", - "git_sha": "199ba086a259e1933d6e0ab7596e4a977bbd483a", + "git_sha": "ccf06a896339d2a4ed8594daa9f9adb827cb8189", "installed_by": ["modules"] }, "eklipse": { diff --git a/modules/nf-core/deepvariant/README.md b/modules/nf-core/deepvariant/README.md index ca112a7d..9d1ceb34 100644 --- a/modules/nf-core/deepvariant/README.md +++ b/modules/nf-core/deepvariant/README.md @@ -1,6 +1,8 @@ # Conda is not supported at the moment -The [bioconda](https://bioconda.github.io/recipes/deepvariant/README.html) recipe is not fully working as expected +The [bioconda](https://bioconda.github.io/recipes/deepvariant/README.html) recipe is not fully working as expected. + +See https://github.com/bioconda/bioconda-recipes/issues/30310 and https://github.com/nf-core/modules/issues/1754 for more information. Hence, we are using the docker container provided by the authors of the tool: diff --git a/modules/nf-core/deepvariant/main.nf b/modules/nf-core/deepvariant/main.nf index 507b6c11..a560cbe9 100644 --- a/modules/nf-core/deepvariant/main.nf +++ b/modules/nf-core/deepvariant/main.nf @@ -2,8 +2,10 @@ process DEEPVARIANT { tag "$meta.id" label 'process_high' - //Conda is not supported at the moment - container "nf-core/deepvariant:1.5.0" + // FIXME Conda is not supported at the moment + // BUG https://github.com/nf-core/modules/issues/1754 + // BUG https://github.com/bioconda/bioconda-recipes/issues/30310 + container "nf-core/deepvariant:1.6.1" input: tuple val(meta), path(input), path(index), path(intervals) @@ -29,6 +31,9 @@ process DEEPVARIANT { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def regions = intervals ? "--regions=${intervals}" : "" + // WARN https://github.com/nf-core/modules/pull/5801#issuecomment-2194293755 + // FIXME Revert this on next version bump + def VERSION = '1.6.1' """ /opt/deepvariant/bin/run_deepvariant \\ @@ -38,12 +43,12 @@ process DEEPVARIANT { --output_gvcf=${prefix}.g.vcf.gz \\ ${args} \\ ${regions} \\ - --intermediate_results_dir=. \\ + --intermediate_results_dir=tmp \\ --num_shards=${task.cpus} cat <<-END_VERSIONS > versions.yml "${task.process}": - deepvariant: \$(echo \$(/opt/deepvariant/bin/run_deepvariant --version) | sed 's/^.*version //; s/ .*\$//' ) + deepvariant: $VERSION END_VERSIONS """ @@ -53,6 +58,9 @@ process DEEPVARIANT { error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." } prefix = task.ext.prefix ?: "${meta.id}" + // WARN https://github.com/nf-core/modules/pull/5801#issuecomment-2194293755 + // FIXME Revert this on next version bump + def VERSION = '1.6.1' """ touch ${prefix}.vcf.gz touch ${prefix}.vcf.gz.tbi @@ -61,7 +69,7 @@ process DEEPVARIANT { cat <<-END_VERSIONS > versions.yml "${task.process}": - deepvariant: \$(echo \$(/opt/deepvariant/bin/run_deepvariant --version) | sed 's/^.*version //; s/ .*\$//' ) + deepvariant: $VERSION END_VERSIONS """ } diff --git a/modules/nf-core/deepvariant/tests/main.nf.test.snap b/modules/nf-core/deepvariant/tests/main.nf.test.snap index 6ad76ae4..c49f7e4d 100644 --- a/modules/nf-core/deepvariant/tests/main.nf.test.snap +++ b/modules/nf-core/deepvariant/tests/main.nf.test.snap @@ -1,269 +1,269 @@ { - "homo_sapiens - [bam, bai] - fasta_gz - fasta_gz_fai": { - "content": [ - { - "0": [ - [ + "homo_sapiens - [bam, bai] - fasta_gz - fasta_gz_fai": { + "content": [ { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4" - ] + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d" + ] + ], + "4": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a" + ] + ], + "gvcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0" + ] + ], + "vcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21" + ] + ], + "versions": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ] + } ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217" - ] - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952" - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a" - ] - ], - "4": [ - "versions.yml:md5,4678f778b58276933b165fe3e84afc6a" - ], - "gvcf": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952" - ] - ], - "gvcf_tbi": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a" - ] - ], - "vcf": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4" - ] - ], - "vcf_tbi": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217" - ] - ], - "versions": [ - "versions.yml:md5,4678f778b58276933b165fe3e84afc6a" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-01T12:09:40.987117305" }, - "timestamp": "2024-03-20T13:54:42.757335334" - }, - "homo_sapiens - [bam, bai] - fasta - fai": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217" - ] - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952" - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a" - ] - ], - "4": [ - "versions.yml:md5,4678f778b58276933b165fe3e84afc6a" - ], - "gvcf": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952" - ] - ], - "gvcf_tbi": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a" - ] - ], - "vcf": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4" - ] - ], - "vcf_tbi": [ - [ + "homo_sapiens - [bam, bai] - fasta - fai": { + "content": [ { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217" - ] + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d" + ] + ], + "4": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a" + ] + ], + "gvcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0" + ] + ], + "vcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21" + ] + ], + "versions": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ] + } ], - "versions": [ - "versions.yml:md5,4678f778b58276933b165fe3e84afc6a" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-01T12:08:47.058887374" }, - "timestamp": "2024-03-20T13:54:18.409489045" - }, - "homo_sapiens - [cram, crai, genome_bed] - fasta - fai": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4" - ] - ], - "1": [ - [ + "homo_sapiens - [cram, crai, genome_bed] - fasta - fai": { + "content": [ { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217" - ] + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d" + ] + ], + "4": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a" + ] + ], + "gvcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0" + ] + ], + "vcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21" + ] + ], + "versions": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ] + } ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952" - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a" - ] - ], - "4": [ - "versions.yml:md5,4678f778b58276933b165fe3e84afc6a" - ], - "gvcf": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952" - ] - ], - "gvcf_tbi": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a" - ] - ], - "vcf": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4" - ] - ], - "vcf_tbi": [ - [ - { - "id": "test", - "single_end": false - }, - "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217" - ] - ], - "versions": [ - "versions.yml:md5,4678f778b58276933b165fe3e84afc6a" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T13:54:30.523871801" - } -} + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-01T12:09:13.952808655" + } +} \ No newline at end of file From ec16d8d2b35f4fd8100422d57f01b71b208d7b61 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 8 Aug 2024 09:09:42 +0200 Subject: [PATCH 061/113] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 642b1dd5..c7e97c9e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- Updated deepvariant to version 1.6.1 [#587](https://github.com/nf-core/raredisease/pull/587) - Parallelized vcfanno [#585](https://github.com/nf-core/raredisease/pull/585) - Skip ROH calling with bcftools if there are no affected samples [#579](https://github.com/nf-core/raredisease/pull/579) - Refactored tool citation list [#577](https://github.com/nf-core/raredisease/pull/577) From 9542a38729465fa5b194a5a23a58f33619ba0315 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 8 Aug 2024 20:42:10 +0200 Subject: [PATCH 062/113] add extra resource --- main.nf | 1 + nextflow_schema.json | 8 +++++ subworkflows/local/annotate_genome_snvs.nf | 5 +-- subworkflows/local/annotate_mt_snvs.nf | 9 ++++-- subworkflows/local/prepare_references.nf | 19 +++++++++--- workflows/raredisease.nf | 36 +++++++++++++--------- 6 files changed, 53 insertions(+), 25 deletions(-) diff --git a/main.nf b/main.nf index 838d75d7..96fced9c 100644 --- a/main.nf +++ b/main.nf @@ -58,6 +58,7 @@ params.variant_consequences_sv = getGenomeAttribute('variant_consequence params.vep_filters = getGenomeAttribute('vep_filters') params.vep_filters_scout_fmt = getGenomeAttribute('vep_filters_scout_fmt') params.vcf2cytosure_blacklist = getGenomeAttribute('vcf2cytosure_blacklist') +params.vcfanno_extra_resources = getGenomeAttribute('vcfanno_extra_resources') params.vcfanno_resources = getGenomeAttribute('vcfanno_resources') params.vcfanno_toml = getGenomeAttribute('vcfanno_toml') params.vcfanno_lua = getGenomeAttribute('vcfanno_lua') diff --git a/nextflow_schema.json b/nextflow_schema.json index 81ef4683..36905fab 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -402,6 +402,14 @@ "fa_icon": "fas fa-file", "description": "Path to vcf2cytosure blacklist file" }, + "vcfanno_extra_resources": { + "type": "string", + "exists": true, + "format": "file-path", + "description": "Path to a VCF file containing annotations.", + "help_text": "can be used to supply case-specific annotations in addition to those provided using --vcfanno_resources", + "fa_icon": "fas fa-file" + }, "vcfanno_resources": { "type": "string", "exists": true, diff --git a/subworkflows/local/annotate_genome_snvs.nf b/subworkflows/local/annotate_genome_snvs.nf index 8ae123f7..c84828e6 100644 --- a/subworkflows/local/annotate_genome_snvs.nf +++ b/subworkflows/local/annotate_genome_snvs.nf @@ -28,7 +28,8 @@ workflow ANNOTATE_GENOME_SNVS { analysis_type // string: [mandatory] 'wgs' or 'wes' ch_cadd_header // channel: [mandatory] [ path(txt) ] ch_cadd_resources // channel: [mandatory] [ path(annotation) ] - ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] + ch_vcfanno_extra // channel: [mandatory] [ path(resources & indices) ] + ch_vcfanno_resources // channel: [mandatory] [ path(resources & indices) ] ch_vcfanno_lua // channel: [mandatory] [ path(lua) ] ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] val_vep_genome // string: [mandatory] GRCh37 or GRCh38 @@ -69,7 +70,7 @@ workflow ANNOTATE_GENOME_SNVS { GATK4_SELECTVARIANTS.out.vcf .join(GATK4_SELECTVARIANTS.out.tbi) - .map { meta, vcf, tbi -> return [meta, vcf, tbi, []] } + .combine(ch_vcfanno_extra) .set { ch_vcfanno_in } VCFANNO (ch_vcfanno_in, ch_vcfanno_toml, ch_vcfanno_lua, ch_vcfanno_resources) diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf index aab01ef6..f83aa69b 100644 --- a/subworkflows/local/annotate_mt_snvs.nf +++ b/subworkflows/local/annotate_mt_snvs.nf @@ -19,8 +19,9 @@ workflow ANNOTATE_MT_SNVS { ch_cadd_header // channel: [mandatory] [ path(txt) ] ch_cadd_resources // channel: [mandatory] [ path(annotation) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_vcfanno_lua // channel: [mandatory] [ path(lua) ] - ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] + ch_vcfanno_extra // channel: [mandatory] [ path(resources & indices) ] + ch_vcfanno_lua // channel: [mandatory] [ path(lua) ] + ch_vcfanno_resources // channel: [mandatory] [ path(resources & indices) ] ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] val_vep_genome // string: [mandatory] GRCh37 or GRCh38 val_vep_cache_version // string: [mandatory] 107 @@ -45,7 +46,9 @@ workflow ANNOTATE_MT_SNVS { // Vcfanno ZIP_TABIX_HMTNOTE_MT.out.gz_tbi - .map { meta, vcf, tbi -> return [meta + [prefix: meta.prefix + "_vcfanno"], vcf, tbi, []]} + .combine(ch_vcfanno_extra) + .map { meta, vcf, tbi, resources -> return [meta + [prefix: meta.prefix + "_vcfanno"], vcf, tbi, resources]} + .dump (tag:'test') .set { ch_in_vcfanno } VCFANNO_MT(ch_in_vcfanno, ch_vcfanno_toml, ch_vcfanno_lua, ch_vcfanno_resources) diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 7c7726b4..f7e1849f 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -22,6 +22,7 @@ include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_MT_SHIFT } from '../../modul include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_GENOME } from '../../modules/nf-core/sentieon/bwaindex/main' include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_MT_SHIFT } from '../../modules/nf-core/sentieon/bwaindex/main' include { TABIX_BGZIPTABIX as TABIX_PBT } from '../../modules/nf-core/tabix/bgziptabix/main' +include { TABIX_BGZIPTABIX as TABIX_VCFANNOEXTRA } from '../../modules/nf-core/tabix/bgziptabix/main' include { TABIX_TABIX as TABIX_DBSNP } from '../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_GNOMAD_AF } from '../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_PT } from '../../modules/nf-core/tabix/tabix/main' @@ -36,14 +37,16 @@ workflow PREPARE_REFERENCES { ch_gnomad_af_tab // channel: [optional; used in for snv annotation] [ val(meta), path(tab) ] ch_known_dbsnp // channel: [optional; used only by sentieon] [ val(meta), path(vcf) ] ch_target_bed // channel: [mandatory for WES] [ path(bed) ] + ch_vcfanno_extra_unprocessed // channel: [mandatory] [ val(meta), path(vcf) ] ch_vep_cache // channel: [mandatory for annotation] [ path(cache) ] main: - ch_versions = Channel.empty() - ch_tbi = Channel.empty() - ch_bgzip_tbi = Channel.empty() - ch_bwa = Channel.empty() - ch_sentieonbwa = Channel.empty() + ch_versions = Channel.empty() + ch_tbi = Channel.empty() + ch_bgzip_tbi = Channel.empty() + ch_bwa = Channel.empty() + ch_sentieonbwa = Channel.empty() + ch_vcfanno_extra = Channel.empty() // Genome indices SAMTOOLS_FAIDX_GENOME(ch_genome_fasta, [[],[]]) @@ -87,6 +90,10 @@ workflow PREPARE_REFERENCES { TABIX_GNOMAD_AF(ch_gnomad_af_tab) TABIX_PT(ch_target_bed).tbi.set { ch_tbi } TABIX_PBT(ch_target_bed).gz_tbi.set { ch_bgzip_tbi } + TABIX_VCFANNOEXTRA(ch_vcfanno_extra_unprocessed) + .gz_tbi + .map { meta, vcf, tbi -> return [[vcf,tbi]] } + .set {ch_vcfanno_extra} // Generate bait and target intervals GATK_BILT(ch_target_bed, ch_dict).interval_list @@ -125,6 +132,7 @@ workflow PREPARE_REFERENCES { ch_versions = ch_versions.mix(TABIX_GNOMAD_AF.out.versions) ch_versions = ch_versions.mix(TABIX_PT.out.versions) ch_versions = ch_versions.mix(TABIX_PBT.out.versions) + ch_versions = ch_versions.mix(TABIX_VCFANNOEXTRA.out.versions) ch_versions = ch_versions.mix(TABIX_DBSNP.out.versions) ch_versions = ch_versions.mix(GATK_BILT.out.versions) ch_versions = ch_versions.mix(GATK_ILT.out.versions) @@ -153,6 +161,7 @@ workflow PREPARE_REFERENCES { gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ] known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ] target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ] + vcfanno_extra = ch_vcfanno_extra.ifEmpty([[]]) // channel: [ path(intervals) ] bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect() // channel: [ path(intervals) ] target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ] vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ] diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 082dcc26..864eaf0c 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -188,21 +188,23 @@ workflow RAREDISEASE { // // Initialize file channels for PREPARE_REFERENCES subworkflow // - ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect() - ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() - : Channel.empty() - ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() - : Channel.empty() - ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([[],[]]) - ch_dbsnp = params.known_dbsnp ? Channel.fromPath(params.known_dbsnp).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([[],[]]) - ch_mt_fasta = params.mt_fasta ? Channel.fromPath(params.mt_fasta).map { it -> [[id:it[0].simpleName], it] }.collect() - : Channel.empty() - ch_target_bed_unprocessed = params.target_bed ? Channel.fromPath(params.target_bed).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([[],[]]) - ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect() - : Channel.value([[],[]]) + ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect() + ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.empty() + ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.empty() + ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_dbsnp = params.known_dbsnp ? Channel.fromPath(params.known_dbsnp).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_mt_fasta = params.mt_fasta ? Channel.fromPath(params.mt_fasta).map { it -> [[id:it[0].simpleName], it] }.collect() + : Channel.empty() + ch_target_bed_unprocessed = params.target_bed ? Channel.fromPath(params.target_bed).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_vcfanno_extra_unprocessed = params.vcfanno_extra_resources ? Channel.fromPath(params.vcfanno_extra_resources).map { it -> [[id:it.baseName], it] }.collect() + : Channel.empty() + ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect() + : Channel.value([[],[]]) // // Prepare references and indices. @@ -215,6 +217,7 @@ workflow RAREDISEASE { ch_gnomad_af_tab, ch_dbsnp, ch_target_bed_unprocessed, + ch_vcfanno_extra_unprocessed, ch_vep_cache_unprocessed ) .set { ch_references } @@ -306,6 +309,7 @@ workflow RAREDISEASE { : Channel.value([]) ch_variant_consequences_sv = params.variant_consequences_sv ? Channel.fromPath(params.variant_consequences_sv).collect() : Channel.value([]) + ch_vcfanno_extra = ch_references.vcfanno_extra ch_vcfanno_resources = params.vcfanno_resources ? Channel.fromPath(params.vcfanno_resources).splitText().map{it -> it.trim()}.collect() : Channel.value([]) ch_vcf2cytosure_blacklist = params.vcf2cytosure_blacklist ? Channel.fromPath(params.vcf2cytosure_blacklist).collect() @@ -514,6 +518,7 @@ workflow RAREDISEASE { params.analysis_type, ch_cadd_header, ch_cadd_resources, + ch_vcfanno_extra, ch_vcfanno_resources, ch_vcfanno_lua, ch_vcfanno_toml, @@ -570,6 +575,7 @@ workflow RAREDISEASE { ch_cadd_header, ch_cadd_resources, ch_genome_fasta, + ch_vcfanno_extra, ch_vcfanno_lua, ch_vcfanno_resources, ch_vcfanno_toml, From 7f3e64863ff58b4504fdd8dfac5b4bab8fa99220 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 9 Aug 2024 09:05:44 +0200 Subject: [PATCH 063/113] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 642b1dd5..b3e7ad68 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- A new parameter `vcfanno_extra_resources` to pass an extra resource to vcfanno [#588](https://github.com/nf-core/raredisease/pull/588) - A new parameter `scatter_count` to control how many interval files are created from a genome (used to parallelize annotations) [#585](https://github.com/nf-core/raredisease/pull/585) - Print warning messages if user intends to perform ranking when there are no affected samples [#579](https://github.com/nf-core/raredisease/pull/579) - Two new parameters `skip_repeat_annotation` and `skip_repeat_calling` to skip calling and annotation of repeat expansions [#574](https://github.com/nf-core/raredisease/pull/574) From 05707c025a4048dbfa89241b9fb197fb2cc22057 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 9 Aug 2024 09:38:39 +0200 Subject: [PATCH 064/113] Update nextflow_schema.json Co-authored-by: Anders Jemt --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 36905fab..f2c79a4f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -407,7 +407,7 @@ "exists": true, "format": "file-path", "description": "Path to a VCF file containing annotations.", - "help_text": "can be used to supply case-specific annotations in addition to those provided using --vcfanno_resources", + "help_text": "Can be used to supply case-specific annotations in addition to those provided using --vcfanno_resources", "fa_icon": "fas fa-file" }, "vcfanno_resources": { From d1894b58c5e658a2f96d8ece20e3da88e4f45337 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 9 Aug 2024 09:40:30 +0200 Subject: [PATCH 065/113] Update annotate_mt_snvs.nf --- subworkflows/local/annotate_mt_snvs.nf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf index f83aa69b..bb5dca6b 100644 --- a/subworkflows/local/annotate_mt_snvs.nf +++ b/subworkflows/local/annotate_mt_snvs.nf @@ -19,9 +19,9 @@ workflow ANNOTATE_MT_SNVS { ch_cadd_header // channel: [mandatory] [ path(txt) ] ch_cadd_resources // channel: [mandatory] [ path(annotation) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_vcfanno_extra // channel: [mandatory] [ path(resources & indices) ] + ch_vcfanno_extra // channel: [mandatory] [ [path(vcf),path(index).....] ] ch_vcfanno_lua // channel: [mandatory] [ path(lua) ] - ch_vcfanno_resources // channel: [mandatory] [ path(resources & indices) ] + ch_vcfanno_resources // channel: [mandatory] [ [path(vcf),path(index),.....] ] ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] val_vep_genome // string: [mandatory] GRCh37 or GRCh38 val_vep_cache_version // string: [mandatory] 107 @@ -48,7 +48,6 @@ workflow ANNOTATE_MT_SNVS { ZIP_TABIX_HMTNOTE_MT.out.gz_tbi .combine(ch_vcfanno_extra) .map { meta, vcf, tbi, resources -> return [meta + [prefix: meta.prefix + "_vcfanno"], vcf, tbi, resources]} - .dump (tag:'test') .set { ch_in_vcfanno } VCFANNO_MT(ch_in_vcfanno, ch_vcfanno_toml, ch_vcfanno_lua, ch_vcfanno_resources) From 5bd184a634cd1454a76ca01c29a84316704edade Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 9 Aug 2024 09:41:54 +0200 Subject: [PATCH 066/113] Update prepare_references.nf --- subworkflows/local/prepare_references.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index f7e1849f..7f5eb05c 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -93,6 +93,7 @@ workflow PREPARE_REFERENCES { TABIX_VCFANNOEXTRA(ch_vcfanno_extra_unprocessed) .gz_tbi .map { meta, vcf, tbi -> return [[vcf,tbi]] } + .collect() .set {ch_vcfanno_extra} // Generate bait and target intervals From 6bbc9274482fc2fc0030db853ac3de32f5818ba4 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 9 Aug 2024 09:43:09 +0200 Subject: [PATCH 067/113] Update annotate_genome_snvs.nf --- subworkflows/local/annotate_genome_snvs.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/annotate_genome_snvs.nf b/subworkflows/local/annotate_genome_snvs.nf index c84828e6..51d5a64c 100644 --- a/subworkflows/local/annotate_genome_snvs.nf +++ b/subworkflows/local/annotate_genome_snvs.nf @@ -28,8 +28,8 @@ workflow ANNOTATE_GENOME_SNVS { analysis_type // string: [mandatory] 'wgs' or 'wes' ch_cadd_header // channel: [mandatory] [ path(txt) ] ch_cadd_resources // channel: [mandatory] [ path(annotation) ] - ch_vcfanno_extra // channel: [mandatory] [ path(resources & indices) ] - ch_vcfanno_resources // channel: [mandatory] [ path(resources & indices) ] + ch_vcfanno_extra // channel: [mandatory] [ [path(vcf),path(index)] ] + ch_vcfanno_resources // channel: [mandatory] [ [path(vcf),path(index),...] ] ch_vcfanno_lua // channel: [mandatory] [ path(lua) ] ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] val_vep_genome // string: [mandatory] GRCh37 or GRCh38 From 99805a4862e95407ef2ed6eb8fe36552ffb77c3f Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 9 Aug 2024 09:43:58 +0200 Subject: [PATCH 068/113] Update prepare_references.nf --- subworkflows/local/prepare_references.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 7f5eb05c..2a6cbc0a 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -162,7 +162,7 @@ workflow PREPARE_REFERENCES { gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ] known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ] target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ] - vcfanno_extra = ch_vcfanno_extra.ifEmpty([[]]) // channel: [ path(intervals) ] + vcfanno_extra = ch_vcfanno_extra.ifEmpty([[]]) // channel: [ path(intervals) ] bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect() // channel: [ path(intervals) ] target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ] vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ] From 08302deb13e94588b9598bd213a42e3e3828467b Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 9 Aug 2024 10:32:08 +0200 Subject: [PATCH 069/113] Update prepare_references.nf --- subworkflows/local/prepare_references.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 2a6cbc0a..dd502db2 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -162,7 +162,7 @@ workflow PREPARE_REFERENCES { gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ] known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ] target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ] - vcfanno_extra = ch_vcfanno_extra.ifEmpty([[]]) // channel: [ path(intervals) ] + vcfanno_extra = ch_vcfanno_extra.ifEmpty([[]]) // channel: [ [path(vcf), path(tbi)] ] bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect() // channel: [ path(intervals) ] target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ] vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ] From ad9a16fbecc50e6759a47bfcc6b0edca9613d244 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 9 Aug 2024 16:15:46 +0200 Subject: [PATCH 070/113] add feature --- CHANGELOG.md | 2 ++ subworkflows/local/prepare_references.nf | 28 +++++++++++++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 140d2a28..2c4667bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- A new functionality to pass gzipped resources to vcfanno_extra_resources [#589](https://github.com/nf-core/raredisease/pull/589) - A new parameter `vcfanno_extra_resources` to pass an extra resource to vcfanno [#588](https://github.com/nf-core/raredisease/pull/588) - A new parameter `scatter_count` to control how many interval files are created from a genome (used to parallelize annotations) [#585](https://github.com/nf-core/raredisease/pull/585) - Print warning messages if user intends to perform ranking when there are no affected samples [#579](https://github.com/nf-core/raredisease/pull/579) @@ -51,6 +52,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | | skip_smncopynumbercaller | | | skip_repeat_annotation | | | scatter_count | +| | vcfanno_extra_resources | ## 2.1.0 - Obelix [2024-05-29] diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index dd502db2..4484bcd0 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -22,7 +22,8 @@ include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_MT_SHIFT } from '../../modul include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_GENOME } from '../../modules/nf-core/sentieon/bwaindex/main' include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_MT_SHIFT } from '../../modules/nf-core/sentieon/bwaindex/main' include { TABIX_BGZIPTABIX as TABIX_PBT } from '../../modules/nf-core/tabix/bgziptabix/main' -include { TABIX_BGZIPTABIX as TABIX_VCFANNOEXTRA } from '../../modules/nf-core/tabix/bgziptabix/main' +include { TABIX_BGZIPTABIX as TABIX_BGZIPINDEX_VCFANNOEXTRA } from '../../modules/nf-core/tabix/bgziptabix/main' +include { TABIX_TABIX as TABIX_VCFANNOEXTRA } from '../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_DBSNP } from '../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_GNOMAD_AF } from '../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_PT } from '../../modules/nf-core/tabix/tabix/main' @@ -47,6 +48,8 @@ workflow PREPARE_REFERENCES { ch_bwa = Channel.empty() ch_sentieonbwa = Channel.empty() ch_vcfanno_extra = Channel.empty() + ch_vcfanno_bgzip = Channel.empty() + ch_vcfanno_index = Channel.empty() // Genome indices SAMTOOLS_FAIDX_GENOME(ch_genome_fasta, [[],[]]) @@ -90,12 +93,26 @@ workflow PREPARE_REFERENCES { TABIX_GNOMAD_AF(ch_gnomad_af_tab) TABIX_PT(ch_target_bed).tbi.set { ch_tbi } TABIX_PBT(ch_target_bed).gz_tbi.set { ch_bgzip_tbi } - TABIX_VCFANNOEXTRA(ch_vcfanno_extra_unprocessed) - .gz_tbi + ch_vcfanno_extra_unprocessed + .branch { it -> + bgzipindex: !it[1].toString().endsWith(".gz") + index: it[1].toString().endsWith(".gz") + } + .set { ch_vcfanno_tabix_in } + + TABIX_VCFANNOEXTRA(ch_vcfanno_tabix_in.index).tbi + .join(ch_vcfanno_tabix_in.index) + .map { meta, tbi, vcf -> return [[vcf,tbi]]} + .set {ch_vcfanno_index} + + TABIX_BGZIPINDEX_VCFANNOEXTRA(ch_vcfanno_tabix_in.bgzipindex).gz_tbi .map { meta, vcf, tbi -> return [[vcf,tbi]] } - .collect() - .set {ch_vcfanno_extra} + .set {ch_vcfanno_bgzip} + Channel.empty() + .mix(ch_vcfanno_bgzip, ch_vcfanno_index) + .collect() + .set{ch_vcfanno_extra} // Generate bait and target intervals GATK_BILT(ch_target_bed, ch_dict).interval_list GATK_ILT(GATK_BILT.out.interval_list) @@ -133,6 +150,7 @@ workflow PREPARE_REFERENCES { ch_versions = ch_versions.mix(TABIX_GNOMAD_AF.out.versions) ch_versions = ch_versions.mix(TABIX_PT.out.versions) ch_versions = ch_versions.mix(TABIX_PBT.out.versions) + ch_versions = ch_versions.mix(TABIX_BGZIPINDEX_VCFANNOEXTRA.out.versions) ch_versions = ch_versions.mix(TABIX_VCFANNOEXTRA.out.versions) ch_versions = ch_versions.mix(TABIX_DBSNP.out.versions) ch_versions = ch_versions.mix(GATK_BILT.out.versions) From 6dd5f0a3befd94ee658ef1845011338065fe2f70 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 13 Aug 2024 16:38:56 +0200 Subject: [PATCH 071/113] Corrects usage docs --- docs/usage.md | 65 ++++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 66d0b357..259531ba 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,24 +10,24 @@ Table of contents: - [Run nf-core/raredisease with test data](#run-nf-coreraredisease-with-test-data) - [Updating the pipeline](#updating-the-pipeline) - [Run nf-core/raredisease with your data](#run-nf-coreraredisease-with-your-data) - - [Samplesheet](#samplesheet) - - [Reference files and parameters](#reference-files-and-parameters) - - [1. Alignment](#1-alignment) - - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) - - [3. Repeat expansions](#3-repeat-expansions) - - [4. Variant calling - SNV](#4-variant-calling---snv) - - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) - - [6. Copy number variant calling](#6-copy-number-variant-calling) - - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) - - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) - - [9. Mitochondrial annotation](#9-mitochondrial-annotation) - - [10. Mobile element calling](#10-mobile-element-calling) - - [11. Mobile element annotation](#11-mobile-element-annotation) - - [12. Variant evaluation](#12-variant-evaluation) - - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) - - [Run the pipeline](#run-the-pipeline) - - [Direct input in CLI](#direct-input-in-cli) - - [Import from a config file (recommended)](#import-from-a-config-file-recommended) + - [Samplesheet](#samplesheet) + - [Reference files and parameters](#reference-files-and-parameters) + - [1. Alignment](#1-alignment) + - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) + - [3. Repeat expansions](#3-repeat-expansions) + - [4. Variant calling - SNV](#4-variant-calling---snv) + - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) + - [6. Copy number variant calling](#6-copy-number-variant-calling) + - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) + - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) + - [9. Mitochondrial annotation](#9-mitochondrial-annotation) + - [10. Mobile element calling](#10-mobile-element-calling) + - [11. Mobile element annotation](#11-mobile-element-annotation) + - [12. Variant evaluation](#12-variant-evaluation) + - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) + - [Run the pipeline](#run-the-pipeline) + - [Direct input in CLI](#direct-input-in-cli) + - [Import from a config file (recommended)](#import-from-a-config-file-recommended) - [Best practices](#best-practices) - [Core Nextflow arguments](#core-nextflow-arguments) - [`-profile`](#-profile) @@ -228,16 +228,17 @@ The mandatory and optional parameters for each category are tabulated below. ##### 7. SNV annotation & Ranking -| Mandatory | Optional | -| ------------------------------------ | --------------------------------------------- | -| genome1 | reduced_penetrance8 | -| vcfanno_resources2 | vcfanno_lua | -| vcfanno_toml3 | vep_filters/vep_filters_scout_fmt9 | -| vep_cache_version | cadd_resources10 | -| vep_cache4 | vep_plugin_files11 | -| gnomad_af5 | | -| score_config_snv6 | | -| variant_consequences_snv7 | | +| Mandatory | Optional | +| ------------------------------------ | ---------------------------------------------- | +| genome1 | reduced_penetrance9 | +| vcfanno_resources2 | vcfanno_lua | +| vcfanno_toml3 | vep_filters/vep_filters_scout_fmt10 | +| vep_cache_version | cadd_resources11 | +| vep_cache4 | | +| gnomad_af5 | | +| score_config_snv6 | | +| variant_consequences_snv7 | | +| vep_plugin_files8 | | 1Genome version is used by VEP. You have the option to choose between GRCh37 and GRCh38.
    2Path to VCF files and their indices used by vcfanno. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/vcfanno_resources.txt).
    @@ -249,10 +250,10 @@ See example cache [here](https://raw.githubusercontent.com/nf-core/test-datasets no header and the following columns: `CHROM POS REF_ALLELE ALT_ALLELE AF`. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/gnomad_reformated.tab.gz).
    6Used by GENMOD for ranking the variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/rank_model_snv.ini).
    7File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic and mitochondrial SNVs. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/variant_consequences_v2.txt). You can learn more about these terms [here](https://grch37.ensembl.org/info/genome/variation/prediction/predicted_data.html). -8Used by GENMOD while modeling the variants. Contains a list of loci that show [reduced penetrance](https://medlineplus.gov/genetics/understanding/inheritance/penetranceexpressivity/) in people. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/reduced_penetrance.tsv).
    -9 This file contains a list of candidate genes (with [HGNC](https://www.genenames.org/) IDs) that is used to split the variants into canditate variants and research variants. Research variants contain all the variants, while candidate variants are a subset of research variants and are associated with candidate genes. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/hgnc.txt). Not required if --skip_vep_filter is set to true.
    -10Path to a folder containing cadd annotations. Equivalent of the data/annotations/ folder described [here](https://github.com/kircherlab/CADD-scripts/#manual-installation), and it is used to calculate CADD scores for small indels.
    -11A CSV file that describes the files used by VEP's named and custom plugins. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/vep_files.csv).
    +8A CSV file that describes the files used by VEP's named and custom plugins. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/vep_files.csv).
    +9Used by GENMOD while modeling the variants. Contains a list of loci that show [reduced penetrance](https://medlineplus.gov/genetics/understanding/inheritance/penetranceexpressivity/) in people. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/reduced_penetrance.tsv).
    +10 This file contains a list of candidate genes (with [HGNC](https://www.genenames.org/) IDs) that is used to split the variants into canditate variants and research variants. Research variants contain all the variants, while candidate variants are a subset of research variants and are associated with candidate genes. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/hgnc.txt). Not required if --skip_vep_filter is set to true.
    +11Path to a folder containing cadd annotations. Equivalent of the data/annotations/ folder described [here](https://github.com/kircherlab/CADD-scripts/#manual-installation), and it is used to calculate CADD scores for small indels.
    :::note We use CADD only to annotate small indels. To annotate SNVs with precomputed CADD scores, pass the file containing CADD scores as a resource to vcfanno instead. Files containing the precomputed CADD scores for SNVs can be downloaded from [here](https://cadd.gs.washington.edu/download) (download files listed under the description: "All possible SNVs of GRCh3<7/8>/hg3<7/8>") From 0c5b700661186023c11db65f5e32658d11fc4b5b Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 13 Aug 2024 16:41:30 +0200 Subject: [PATCH 072/113] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c4667bd..5d009cda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- Update docs to show 'vep_plugin_files' as a mandatory parameter for SNV annotation [#594](https://github.com/nf-core/raredisease/issues/593) - Error in SVDB merge when only a single SV caller is run [#586](https://github.com/nf-core/raredisease/pull/586) - Errors due to misplaced version statements [#578](https://github.com/nf-core/raredisease/pull/578) - Stub crashes due to peddy reported in [#566](https://github.com/nf-core/raredisease/issues/566) [#576](https://github.com/nf-core/raredisease/pull/576] From 151e909a8d9516faf325150fc26bdc25aeda3292 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 13 Aug 2024 16:44:23 +0200 Subject: [PATCH 073/113] fix lint --- docs/usage.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 259531ba..5a110ad1 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,24 +10,24 @@ Table of contents: - [Run nf-core/raredisease with test data](#run-nf-coreraredisease-with-test-data) - [Updating the pipeline](#updating-the-pipeline) - [Run nf-core/raredisease with your data](#run-nf-coreraredisease-with-your-data) - - [Samplesheet](#samplesheet) - - [Reference files and parameters](#reference-files-and-parameters) - - [1. Alignment](#1-alignment) - - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) - - [3. Repeat expansions](#3-repeat-expansions) - - [4. Variant calling - SNV](#4-variant-calling---snv) - - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) - - [6. Copy number variant calling](#6-copy-number-variant-calling) - - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) - - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) - - [9. Mitochondrial annotation](#9-mitochondrial-annotation) - - [10. Mobile element calling](#10-mobile-element-calling) - - [11. Mobile element annotation](#11-mobile-element-annotation) - - [12. Variant evaluation](#12-variant-evaluation) - - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) - - [Run the pipeline](#run-the-pipeline) - - [Direct input in CLI](#direct-input-in-cli) - - [Import from a config file (recommended)](#import-from-a-config-file-recommended) + - [Samplesheet](#samplesheet) + - [Reference files and parameters](#reference-files-and-parameters) + - [1. Alignment](#1-alignment) + - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) + - [3. Repeat expansions](#3-repeat-expansions) + - [4. Variant calling - SNV](#4-variant-calling---snv) + - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) + - [6. Copy number variant calling](#6-copy-number-variant-calling) + - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) + - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) + - [9. Mitochondrial annotation](#9-mitochondrial-annotation) + - [10. Mobile element calling](#10-mobile-element-calling) + - [11. Mobile element annotation](#11-mobile-element-annotation) + - [12. Variant evaluation](#12-variant-evaluation) + - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) + - [Run the pipeline](#run-the-pipeline) + - [Direct input in CLI](#direct-input-in-cli) + - [Import from a config file (recommended)](#import-from-a-config-file-recommended) - [Best practices](#best-practices) - [Core Nextflow arguments](#core-nextflow-arguments) - [`-profile`](#-profile) From f63d810e4ef8536f9e15cd648e337e27661f12e2 Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 14 Aug 2024 11:14:53 +0200 Subject: [PATCH 074/113] makes 0 an allowed value for sex --- CHANGELOG.md | 1 + assets/schema_input.json | 2 +- docs/usage.md | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c4667bd..35d0b5f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- Allow `0` as a valid value for `sex` in the samplesheet [#595](https://github.com/nf-core/raredisease/pull/587) - Updated deepvariant to version 1.6.1 [#587](https://github.com/nf-core/raredisease/pull/587) - Parallelized vcfanno [#585](https://github.com/nf-core/raredisease/pull/585) - Skip ROH calling with bcftools if there are no affected samples [#579](https://github.com/nf-core/raredisease/pull/579) diff --git a/assets/schema_input.json b/assets/schema_input.json index 38249daa..3dddc6a4 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -42,7 +42,7 @@ "sex": { "type": "string", "meta": ["sex"], - "enum": ["1", "2", "other"], + "enum": ["1", "2", "0", "other"], "errorMessage": "Sex must be provided and cannot contain spaces" }, "phenotype": { diff --git a/docs/usage.md b/docs/usage.md index 66d0b357..f3c2bf0f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -108,7 +108,7 @@ nf-core/raredisease will auto-detect whether a sample is single- or paired-end u | `lane` | Used to generate separate channels during the alignment step. | | `fastq_1` | Absolute path to FASTQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | | `fastq_2` | Absolute path to FASTQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `sex` | Sex (1=male; 2=female; other=unknown). | +| `sex` | Sex (1=male; 2=female; for unknown sex use 0 or other). | | `phenotype` | Affected status of patient (0 = missing; 1=unaffected; 2=affected). | | `paternal_id` | Sample ID of the father, can be blank if the father isn't part of the analysis or for samples other than the proband. | | `maternal_id` | Sample ID of the mother, can be blank if the mother isn't part of the analysis or for samples other than the proband. | From 3185adefa80425bb51f71e755b1d0f5e35282560 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 14 Aug 2024 11:44:31 +0200 Subject: [PATCH 075/113] change lane to string type --- assets/schema_input.json | 5 +- docs/usage.md | 58 +++++++++---------- .../alignment/align_bwa_bwamem2_bwameme.nf | 2 +- 3 files changed, 33 insertions(+), 32 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 38249daa..14574166 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -15,8 +15,9 @@ "errorMessage": "Sample name must be provided and cannot contain spaces" }, "lane": { - "type": "number", - "meta": ["lane"] + "type": "string", + "meta": ["lane"], + "pattern": "^\\S+$" }, "fastq_1": { "type": "string", diff --git a/docs/usage.md b/docs/usage.md index 5a110ad1..405c9767 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,24 +10,24 @@ Table of contents: - [Run nf-core/raredisease with test data](#run-nf-coreraredisease-with-test-data) - [Updating the pipeline](#updating-the-pipeline) - [Run nf-core/raredisease with your data](#run-nf-coreraredisease-with-your-data) - - [Samplesheet](#samplesheet) - - [Reference files and parameters](#reference-files-and-parameters) - - [1. Alignment](#1-alignment) - - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) - - [3. Repeat expansions](#3-repeat-expansions) - - [4. Variant calling - SNV](#4-variant-calling---snv) - - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) - - [6. Copy number variant calling](#6-copy-number-variant-calling) - - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) - - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) - - [9. Mitochondrial annotation](#9-mitochondrial-annotation) - - [10. Mobile element calling](#10-mobile-element-calling) - - [11. Mobile element annotation](#11-mobile-element-annotation) - - [12. Variant evaluation](#12-variant-evaluation) - - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) - - [Run the pipeline](#run-the-pipeline) - - [Direct input in CLI](#direct-input-in-cli) - - [Import from a config file (recommended)](#import-from-a-config-file-recommended) + - [Samplesheet](#samplesheet) + - [Reference files and parameters](#reference-files-and-parameters) + - [1. Alignment](#1-alignment) + - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) + - [3. Repeat expansions](#3-repeat-expansions) + - [4. Variant calling - SNV](#4-variant-calling---snv) + - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) + - [6. Copy number variant calling](#6-copy-number-variant-calling) + - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) + - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) + - [9. Mitochondrial annotation](#9-mitochondrial-annotation) + - [10. Mobile element calling](#10-mobile-element-calling) + - [11. Mobile element annotation](#11-mobile-element-annotation) + - [12. Variant evaluation](#12-variant-evaluation) + - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) + - [Run the pipeline](#run-the-pipeline) + - [Direct input in CLI](#direct-input-in-cli) + - [Import from a config file (recommended)](#import-from-a-config-file-recommended) - [Best practices](#best-practices) - [Core Nextflow arguments](#core-nextflow-arguments) - [`-profile`](#-profile) @@ -102,17 +102,17 @@ A samplesheet is used to pass the information about the sample(s), such as the p nf-core/raredisease will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The pedigree information in the samplesheet (sex and phenotype) should be provided as they would be for a [ped file](https://gatk.broadinstitute.org/hc/en-us/articles/360035531972-PED-Pedigree-format) (i.e. 1 for male, 2 for female, other for unknown). -| Fields | Description | -| ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `lane` | Used to generate separate channels during the alignment step. | -| `fastq_1` | Absolute path to FASTQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Absolute path to FASTQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `sex` | Sex (1=male; 2=female; other=unknown). | -| `phenotype` | Affected status of patient (0 = missing; 1=unaffected; 2=affected). | -| `paternal_id` | Sample ID of the father, can be blank if the father isn't part of the analysis or for samples other than the proband. | -| `maternal_id` | Sample ID of the mother, can be blank if the mother isn't part of the analysis or for samples other than the proband. | -| `case_id` | Case ID, for the analysis used when generating a family VCF. | +| Fields | Description | +| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `lane` | Used to generate separate channels during the alignment step. It is of string type, and we recommend using a combination of flowcell and lane to distinguish between different runs of the same sample. | +| `fastq_1` | Absolute path to FASTQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Absolute path to FASTQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `sex` | Sex (1=male; 2=female; other=unknown). | +| `phenotype` | Affected status of patient (0 = missing; 1=unaffected; 2=affected). | +| `paternal_id` | Sample ID of the father, can be blank if the father isn't part of the analysis or for samples other than the proband. | +| `maternal_id` | Sample ID of the mother, can be blank if the mother isn't part of the analysis or for samples other than the proband. | +| `case_id` | Case ID, for the analysis used when generating a family VCF. | It is also possible to include multiple runs of the same sample in a samplesheet. For example, when you have re-sequenced the same sample more than once to increase sequencing depth. In that case, the `sample` identifiers in the samplesheet have to be the same. The pipeline will align the raw read/read-pairs independently before merging the alignments belonging to the same sample. Below is an example for a trio with the proband sequenced across two lanes: diff --git a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf index 15d3db9a..0e10e6d5 100644 --- a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf +++ b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf @@ -68,7 +68,7 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME { ch_align .map{ meta, bam -> new_id = meta.sample - new_meta = meta + [id:new_id, read_group:"\'@RG\\tID:" + new_id + "\\tPL:" + val_platform + "\\tSM:" + new_id + "\'"] - meta.subMap('lane') + new_meta = meta + [id:new_id, read_group:"\'@RG\\tID:" + new_id + "_" + meta.lane + "\\tPL:" + val_platform + "\\tSM:" + new_id + "\'"] - meta.subMap('lane') [groupKey(new_meta, new_meta.num_lanes), bam] } .groupTuple() From 35104b8af40fc528eb2ea6ba2be3d48c553b95b0 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 14 Aug 2024 11:46:50 +0200 Subject: [PATCH 076/113] lint --- docs/usage.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 405c9767..f949a43b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,24 +10,24 @@ Table of contents: - [Run nf-core/raredisease with test data](#run-nf-coreraredisease-with-test-data) - [Updating the pipeline](#updating-the-pipeline) - [Run nf-core/raredisease with your data](#run-nf-coreraredisease-with-your-data) - - [Samplesheet](#samplesheet) - - [Reference files and parameters](#reference-files-and-parameters) - - [1. Alignment](#1-alignment) - - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) - - [3. Repeat expansions](#3-repeat-expansions) - - [4. Variant calling - SNV](#4-variant-calling---snv) - - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) - - [6. Copy number variant calling](#6-copy-number-variant-calling) - - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) - - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) - - [9. Mitochondrial annotation](#9-mitochondrial-annotation) - - [10. Mobile element calling](#10-mobile-element-calling) - - [11. Mobile element annotation](#11-mobile-element-annotation) - - [12. Variant evaluation](#12-variant-evaluation) - - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) - - [Run the pipeline](#run-the-pipeline) - - [Direct input in CLI](#direct-input-in-cli) - - [Import from a config file (recommended)](#import-from-a-config-file-recommended) + - [Samplesheet](#samplesheet) + - [Reference files and parameters](#reference-files-and-parameters) + - [1. Alignment](#1-alignment) + - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) + - [3. Repeat expansions](#3-repeat-expansions) + - [4. Variant calling - SNV](#4-variant-calling---snv) + - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) + - [6. Copy number variant calling](#6-copy-number-variant-calling) + - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) + - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) + - [9. Mitochondrial annotation](#9-mitochondrial-annotation) + - [10. Mobile element calling](#10-mobile-element-calling) + - [11. Mobile element annotation](#11-mobile-element-annotation) + - [12. Variant evaluation](#12-variant-evaluation) + - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) + - [Run the pipeline](#run-the-pipeline) + - [Direct input in CLI](#direct-input-in-cli) + - [Import from a config file (recommended)](#import-from-a-config-file-recommended) - [Best practices](#best-practices) - [Core Nextflow arguments](#core-nextflow-arguments) - [`-profile`](#-profile) From cbcb0550f2d67e0c2482813cbc0f81af93940ab8 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 14 Aug 2024 14:27:35 +0200 Subject: [PATCH 077/113] update usage --- docs/usage.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 607aa779..550b0e6e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,24 +10,24 @@ Table of contents: - [Run nf-core/raredisease with test data](#run-nf-coreraredisease-with-test-data) - [Updating the pipeline](#updating-the-pipeline) - [Run nf-core/raredisease with your data](#run-nf-coreraredisease-with-your-data) - - [Samplesheet](#samplesheet) - - [Reference files and parameters](#reference-files-and-parameters) - - [1. Alignment](#1-alignment) - - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) - - [3. Repeat expansions](#3-repeat-expansions) - - [4. Variant calling - SNV](#4-variant-calling---snv) - - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) - - [6. Copy number variant calling](#6-copy-number-variant-calling) - - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) - - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) - - [9. Mitochondrial annotation](#9-mitochondrial-annotation) - - [10. Mobile element calling](#10-mobile-element-calling) - - [11. Mobile element annotation](#11-mobile-element-annotation) - - [12. Variant evaluation](#12-variant-evaluation) - - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) - - [Run the pipeline](#run-the-pipeline) - - [Direct input in CLI](#direct-input-in-cli) - - [Import from a config file (recommended)](#import-from-a-config-file-recommended) + - [Samplesheet](#samplesheet) + - [Reference files and parameters](#reference-files-and-parameters) + - [1. Alignment](#1-alignment) + - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) + - [3. Repeat expansions](#3-repeat-expansions) + - [4. Variant calling - SNV](#4-variant-calling---snv) + - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) + - [6. Copy number variant calling](#6-copy-number-variant-calling) + - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) + - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) + - [9. Mitochondrial annotation](#9-mitochondrial-annotation) + - [10. Mobile element calling](#10-mobile-element-calling) + - [11. Mobile element annotation](#11-mobile-element-annotation) + - [12. Variant evaluation](#12-variant-evaluation) + - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) + - [Run the pipeline](#run-the-pipeline) + - [Direct input in CLI](#direct-input-in-cli) + - [Import from a config file (recommended)](#import-from-a-config-file-recommended) - [Best practices](#best-practices) - [Core Nextflow arguments](#core-nextflow-arguments) - [`-profile`](#-profile) From 75a80652435a207864f38280885403ea8edc0060 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 14 Aug 2024 15:17:17 +0200 Subject: [PATCH 078/113] update module --- modules.json | 2 +- modules/nf-core/deepvariant/main.nf | 3 + modules/nf-core/deepvariant/meta.yml | 9 ++ .../nf-core/deepvariant/tests/main.nf.test | 48 ++++++++++ .../deepvariant/tests/main.nf.test.snap | 91 ++++++++++++++++++- .../nextflow-non-autosomal-calling.config | 8 ++ 6 files changed, 159 insertions(+), 2 deletions(-) create mode 100644 modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config diff --git a/modules.json b/modules.json index 9fe5ae84..64ceca8d 100644 --- a/modules.json +++ b/modules.json @@ -113,7 +113,7 @@ }, "deepvariant": { "branch": "master", - "git_sha": "ccf06a896339d2a4ed8594daa9f9adb827cb8189", + "git_sha": "a7e8b8afd4fa82f20d745fa778bfdbf39c1f7efb", "installed_by": ["modules"] }, "eklipse": { diff --git a/modules/nf-core/deepvariant/main.nf b/modules/nf-core/deepvariant/main.nf index a560cbe9..8d3d0911 100644 --- a/modules/nf-core/deepvariant/main.nf +++ b/modules/nf-core/deepvariant/main.nf @@ -12,6 +12,7 @@ process DEEPVARIANT { tuple val(meta2), path(fasta) tuple val(meta3), path(fai) tuple val(meta4), path(gzi) + tuple val(meta5), path(par_bed) output: tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf @@ -31,6 +32,7 @@ process DEEPVARIANT { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def regions = intervals ? "--regions=${intervals}" : "" + def par_regions = par_bed ? "--par_regions_bed=${par_bed}" : "" // WARN https://github.com/nf-core/modules/pull/5801#issuecomment-2194293755 // FIXME Revert this on next version bump def VERSION = '1.6.1' @@ -43,6 +45,7 @@ process DEEPVARIANT { --output_gvcf=${prefix}.g.vcf.gz \\ ${args} \\ ${regions} \\ + ${par_regions} \\ --intermediate_results_dir=tmp \\ --num_shards=${task.cpus} diff --git a/modules/nf-core/deepvariant/meta.yml b/modules/nf-core/deepvariant/meta.yml index a50dc57d..2327dd5f 100644 --- a/modules/nf-core/deepvariant/meta.yml +++ b/modules/nf-core/deepvariant/meta.yml @@ -57,6 +57,15 @@ input: type: file description: GZI index of reference fasta file pattern: "*.gzi" + - meta5: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - par_bed: + type: file + description: BED file containing PAR regions + pattern: "*.bed" output: - meta: type: map diff --git a/modules/nf-core/deepvariant/tests/main.nf.test b/modules/nf-core/deepvariant/tests/main.nf.test index 91612c1e..17765233 100644 --- a/modules/nf-core/deepvariant/tests/main.nf.test +++ b/modules/nf-core/deepvariant/tests/main.nf.test @@ -31,6 +31,9 @@ nextflow_process { input[3] = [ [],[] ] + input[4] = [ + [],[] + ] """ } } @@ -66,6 +69,48 @@ nextflow_process { input[3] = [ [],[] ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - [cram, crai, genome_bed] - fasta - fai - par_bed") { + config "./nextflow-non-autosomal-calling.config" + tag "test" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true) + ] """ } } @@ -102,6 +147,9 @@ nextflow_process { [ id:'genome'], file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.gz.gzi', checkIfExists: true) ] + input[4] = [ + [],[] + ] """ } } diff --git a/modules/nf-core/deepvariant/tests/main.nf.test.snap b/modules/nf-core/deepvariant/tests/main.nf.test.snap index c49f7e4d..04f87774 100644 --- a/modules/nf-core/deepvariant/tests/main.nf.test.snap +++ b/modules/nf-core/deepvariant/tests/main.nf.test.snap @@ -265,5 +265,94 @@ "nextflow": "24.04.2" }, "timestamp": "2024-07-01T12:09:13.952808655" + }, + "homo_sapiens - [cram, crai, genome_bed] - fasta - fai - par_bed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,d2e26d65dbbcea9b087ed191b5c9841c" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0801296d0356415bbf1ef8deb4ec84c3" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,4fcaa9a8b55730d191382160c2b5bb0a" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,f468e846904733b3231ecf00ef7cd4a2" + ] + ], + "4": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz:md5,4fcaa9a8b55730d191382160c2b5bb0a" + ] + ], + "gvcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.g.vcf.gz.tbi:md5,f468e846904733b3231ecf00ef7cd4a2" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz:md5,d2e26d65dbbcea9b087ed191b5c9841c" + ] + ], + "vcf_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.vcf.gz.tbi:md5,0801296d0356415bbf1ef8deb4ec84c3" + ] + ], + "versions": [ + "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T14:29:24.939680679" } -} \ No newline at end of file +} diff --git a/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config b/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config new file mode 100644 index 00000000..4be8986b --- /dev/null +++ b/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config @@ -0,0 +1,8 @@ +process { + + withName: DEEPVARIANT { + ext.args = '--model_type=WGS --haploid_contigs chr22' + ext.prefix = { "${meta.id}_out" } + } + +} From a6b13a1d5fee7fac23fc5ee6722be98bbc860471 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 14 Aug 2024 15:36:49 +0200 Subject: [PATCH 079/113] update config --- conf/modules/call_snv_deepvariant.config | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/conf/modules/call_snv_deepvariant.config b/conf/modules/call_snv_deepvariant.config index 02c2b384..348e95cc 100644 --- a/conf/modules/call_snv_deepvariant.config +++ b/conf/modules/call_snv_deepvariant.config @@ -22,7 +22,10 @@ process { } withName: '.*CALL_SNV_DEEPVARIANT:DEEPVARIANT' { - ext.args = { "--model_type=${params.analysis_type.toUpperCase()}" } + ext.args = { [ + "--model_type=${params.analysis_type.toUpperCase()}", + meta.sex == "1" ? params.genome == 'GRCh37' ? '--haploid_contigs="X,Y"' : '--haploid_contigs="chrX,chrY"' : '' + ].join(' ') } ext.prefix = { "${meta.id}_deepvar" } } From c18bd5fe588f7bfb88ca94c840c37fabcf0026a9 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 14 Aug 2024 15:54:58 +0200 Subject: [PATCH 080/113] add option to supply par bed --- main.nf | 1 + nextflow_schema.json | 8 ++++++++ subworkflows/local/call_snv.nf | 2 ++ .../local/variant_calling/call_snv_deepvariant.nf | 3 ++- workflows/raredisease.nf | 3 +++ 5 files changed, 16 insertions(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 96fced9c..1e80e917 100644 --- a/main.nf +++ b/main.nf @@ -39,6 +39,7 @@ params.mobile_element_references = getGenomeAttribute('mobile_element_refe params.mobile_element_svdb_annotations = getGenomeAttribute('mobile_element_svdb_annotations') params.ml_model = getGenomeAttribute('ml_model') params.mt_fasta = getGenomeAttribute('mt_fasta') +params.par_bed = getGenomeAttribute('par_bed') params.ploidy_model = getGenomeAttribute('ploidy_model') params.reduced_penetrance = getGenomeAttribute('reduced_penetrance') params.readcount_intervals = getGenomeAttribute('readcount_intervals') diff --git a/nextflow_schema.json b/nextflow_schema.json index f2c79a4f..dc91e7a4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -267,6 +267,14 @@ "description": "Path to mitochondrial FASTA genome file.", "fa_icon": "fas fa-file" }, + "par_bed": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-file", + "pattern": "^\\S+\\.bed(\\.gz)?$", + "description": "Path to a BED file containing PAR regions (used by deepvariant)." + }, "ploidy_model": { "type": "string", "exists": true, diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf index 48bc500a..5d0b2ce9 100644 --- a/subworkflows/local/call_snv.nf +++ b/subworkflows/local/call_snv.nf @@ -28,6 +28,7 @@ workflow CALL_SNV { ch_dbsnp_tbi // channel: [optional] [ val(meta), path(tbi) ] ch_call_interval // channel: [mandatory] [ path(intervals) ] ch_ml_model // channel: [mandatory] [ path(model) ] + ch_par_bed // channel: [optional] [ val(meta), path(bed) ] ch_case_info // channel: [mandatory] [ val(case_info) ] ch_foundin_header // channel: [mandatory] [ path(header) ] ch_pcr_indel_model // channel: [optional] [ val(sentieon_dnascope_pcr_indel_model) ] @@ -50,6 +51,7 @@ workflow CALL_SNV { ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, + ch_par_bed, ch_case_info, ch_foundin_header, ch_genome_chrsizes diff --git a/subworkflows/local/variant_calling/call_snv_deepvariant.nf b/subworkflows/local/variant_calling/call_snv_deepvariant.nf index c3477d67..3104ee67 100644 --- a/subworkflows/local/variant_calling/call_snv_deepvariant.nf +++ b/subworkflows/local/variant_calling/call_snv_deepvariant.nf @@ -16,6 +16,7 @@ workflow CALL_SNV_DEEPVARIANT { ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_par_bed // channel: [optional] [ val(meta), path(bed) ] ch_case_info // channel: [mandatory] [ val(case_info) ] ch_foundin_header // channel: [mandatory] [ path(header) ] ch_genome_chrsizes // channel: [mandatory] [ path(chrsizes) ] @@ -28,7 +29,7 @@ workflow CALL_SNV_DEEPVARIANT { } .set { ch_deepvar_in } - DEEPVARIANT ( ch_deepvar_in, ch_genome_fasta, ch_genome_fai, [[],[]] ) + DEEPVARIANT ( ch_deepvar_in, ch_genome_fasta, ch_genome_fai, [[],[]], ch_par_bed ) DEEPVARIANT.out.gvcf .collect{it[1]} .toList() diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 864eaf0c..0234272a 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -279,6 +279,8 @@ workflow RAREDISEASE { ch_mtshift_fai = ch_references.mtshift_fai ch_mtshift_fasta = ch_references.mtshift_fasta ch_mtshift_intervals = ch_references.mtshift_intervals + ch_par_bed = params.par_bed ? Channel.fromPath(params.par_bed).map{ it -> [[id:'par_bed'], it] }.collect() + : Channel.value([[],[]]) ch_ploidy_model = params.ploidy_model ? Channel.fromPath(params.ploidy_model).map{ it -> [[id:it[0].simpleName], it] }.collect() : Channel.empty() ch_readcount_intervals = params.readcount_intervals ? Channel.fromPath(params.readcount_intervals).collect() @@ -502,6 +504,7 @@ workflow RAREDISEASE { ch_dbsnp_tbi, ch_call_interval, ch_ml_model, + ch_par_bed, ch_case_info, ch_foundin_header, Channel.value(params.sentieon_dnascope_pcr_indel_model) From 9ac6ddecfbb9ad6fff034865fcb9cc44c43a962c Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 14 Aug 2024 16:00:29 +0200 Subject: [PATCH 081/113] update changelog --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc93b90f..75aeb25c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- A new parameter `par_bed` to pass a PAR bed files to deepvariant [#598](https://github.com/nf-core/raredisease/pull/598) - A new functionality to pass gzipped resources to vcfanno_extra_resources [#589](https://github.com/nf-core/raredisease/pull/589) - A new parameter `vcfanno_extra_resources` to pass an extra resource to vcfanno [#588](https://github.com/nf-core/raredisease/pull/588) - A new parameter `scatter_count` to control how many interval files are created from a genome (used to parallelize annotations) [#585](https://github.com/nf-core/raredisease/pull/585) @@ -19,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- Males' X and Y chromosomes will be treated as haploids during variant calling by deepvariant [#598](https://github.com/nf-core/raredisease/pull/598) - Allow `0` as a valid value for `sex` in the samplesheet [#595](https://github.com/nf-core/raredisease/pull/587) - Updated deepvariant to version 1.6.1 [#587](https://github.com/nf-core/raredisease/pull/587) - Parallelized vcfanno [#585](https://github.com/nf-core/raredisease/pull/585) @@ -56,6 +58,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | | scatter_count | | | vcfanno_extra_resources | +### Tool updates + +| Tool | Old version | New version | +| ----------- | ----------- | ----------- | +| Deepvariant | 1.5.0 | 1.6.1 | + ## 2.1.0 - Obelix [2024-05-29] ### `Added` From f99aa9e10cc5592a63f699aeca1af8e7deaf8451 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 14 Aug 2024 16:03:22 +0200 Subject: [PATCH 082/113] update changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc93b90f..72edfc62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` -- Allow `0` as a valid value for `sex` in the samplesheet [#595](https://github.com/nf-core/raredisease/pull/587) +- Acceptable type for lane field in the samplesheet from number to string [#597](https://github.com/nf-core/raredisease/pull/597) +- Allow `0` as a valid value for `sex` in the samplesheet [#595](https://github.com/nf-core/raredisease/pull/595) - Updated deepvariant to version 1.6.1 [#587](https://github.com/nf-core/raredisease/pull/587) - Parallelized vcfanno [#585](https://github.com/nf-core/raredisease/pull/585) - Skip ROH calling with bcftools if there are no affected samples [#579](https://github.com/nf-core/raredisease/pull/579) From 57ce07820d8519da9dfd11e313183768cc8c86cf Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 15 Aug 2024 10:25:58 +0200 Subject: [PATCH 083/113] update usage --- docs/usage.md | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 3e84295d..25ff6378 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,24 +10,24 @@ Table of contents: - [Run nf-core/raredisease with test data](#run-nf-coreraredisease-with-test-data) - [Updating the pipeline](#updating-the-pipeline) - [Run nf-core/raredisease with your data](#run-nf-coreraredisease-with-your-data) - - [Samplesheet](#samplesheet) - - [Reference files and parameters](#reference-files-and-parameters) - - [1. Alignment](#1-alignment) - - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) - - [3. Repeat expansions](#3-repeat-expansions) - - [4. Variant calling - SNV](#4-variant-calling---snv) - - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) - - [6. Copy number variant calling](#6-copy-number-variant-calling) - - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) - - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) - - [9. Mitochondrial annotation](#9-mitochondrial-annotation) - - [10. Mobile element calling](#10-mobile-element-calling) - - [11. Mobile element annotation](#11-mobile-element-annotation) - - [12. Variant evaluation](#12-variant-evaluation) - - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) - - [Run the pipeline](#run-the-pipeline) - - [Direct input in CLI](#direct-input-in-cli) - - [Import from a config file (recommended)](#import-from-a-config-file-recommended) + - [Samplesheet](#samplesheet) + - [Reference files and parameters](#reference-files-and-parameters) + - [1. Alignment](#1-alignment) + - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) + - [3. Repeat expansions](#3-repeat-expansions) + - [4. Variant calling - SNV](#4-variant-calling---snv) + - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) + - [6. Copy number variant calling](#6-copy-number-variant-calling) + - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) + - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) + - [9. Mitochondrial annotation](#9-mitochondrial-annotation) + - [10. Mobile element calling](#10-mobile-element-calling) + - [11. Mobile element annotation](#11-mobile-element-annotation) + - [12. Variant evaluation](#12-variant-evaluation) + - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) + - [Run the pipeline](#run-the-pipeline) + - [Direct input in CLI](#direct-input-in-cli) + - [Import from a config file (recommended)](#import-from-a-config-file-recommended) - [Best practices](#best-practices) - [Core Nextflow arguments](#core-nextflow-arguments) - [`-profile`](#-profile) @@ -201,10 +201,12 @@ The mandatory and optional parameters for each category are tabulated below. | ml_model2 | known_dbsnp_tbi2 | | analysis_type3 | call_interval2 | | | known_dbsnp_tbi2 | +| | par_bed4 | 1Default variant caller is DeepVariant, but you have the option to use Sentieon as well.
    2These parameters are only used by Sentieon.
    3Default is WGS, but you have the option to choose WES as well.
    +4This parameter is only used by Deepvariant.
    ##### 5. Variant calling - Structural variants From 24b2b42df590fc3585fa4abe4ca51185d7613c7c Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 15 Aug 2024 10:55:04 +0200 Subject: [PATCH 084/113] lint --- docs/usage.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 6d9af278..4bbff0b5 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,24 +10,24 @@ Table of contents: - [Run nf-core/raredisease with test data](#run-nf-coreraredisease-with-test-data) - [Updating the pipeline](#updating-the-pipeline) - [Run nf-core/raredisease with your data](#run-nf-coreraredisease-with-your-data) - - [Samplesheet](#samplesheet) - - [Reference files and parameters](#reference-files-and-parameters) - - [1. Alignment](#1-alignment) - - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) - - [3. Repeat expansions](#3-repeat-expansions) - - [4. Variant calling - SNV](#4-variant-calling---snv) - - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) - - [6. Copy number variant calling](#6-copy-number-variant-calling) - - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) - - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) - - [9. Mitochondrial annotation](#9-mitochondrial-annotation) - - [10. Mobile element calling](#10-mobile-element-calling) - - [11. Mobile element annotation](#11-mobile-element-annotation) - - [12. Variant evaluation](#12-variant-evaluation) - - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) - - [Run the pipeline](#run-the-pipeline) - - [Direct input in CLI](#direct-input-in-cli) - - [Import from a config file (recommended)](#import-from-a-config-file-recommended) + - [Samplesheet](#samplesheet) + - [Reference files and parameters](#reference-files-and-parameters) + - [1. Alignment](#1-alignment) + - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files) + - [3. Repeat expansions](#3-repeat-expansions) + - [4. Variant calling - SNV](#4-variant-calling---snv) + - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) + - [6. Copy number variant calling](#6-copy-number-variant-calling) + - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) + - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) + - [9. Mitochondrial annotation](#9-mitochondrial-annotation) + - [10. Mobile element calling](#10-mobile-element-calling) + - [11. Mobile element annotation](#11-mobile-element-annotation) + - [12. Variant evaluation](#12-variant-evaluation) + - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens) + - [Run the pipeline](#run-the-pipeline) + - [Direct input in CLI](#direct-input-in-cli) + - [Import from a config file (recommended)](#import-from-a-config-file-recommended) - [Best practices](#best-practices) - [Core Nextflow arguments](#core-nextflow-arguments) - [`-profile`](#-profile) From 06a136f7e4ee759fb83348f867b995058d12801a Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 19 Aug 2024 16:37:27 +0200 Subject: [PATCH 085/113] revert some changes --- CHANGELOG.md | 2 ++ config.config | 3 +++ nextflow.config | 1 + nextflow_schema.json | 7 +++++++ subworkflows/local/align.nf | 10 ++-------- subworkflows/local/alignment/align_MT.nf | 14 +++----------- 6 files changed, 18 insertions(+), 19 deletions(-) create mode 100644 config.config diff --git a/CHANGELOG.md b/CHANGELOG.md index 75f0eee0..ce06d8f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- A new parameter `mt_aligner` to control which aligner is used to align reads to mitochondria - A new parameter `par_bed` to pass a PAR bed files to deepvariant [#598](https://github.com/nf-core/raredisease/pull/598) - A new functionality to pass gzipped resources to vcfanno_extra_resources [#589](https://github.com/nf-core/raredisease/pull/589) - A new parameter `vcfanno_extra_resources` to pass an extra resource to vcfanno [#588](https://github.com/nf-core/raredisease/pull/588) @@ -46,6 +47,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | Old parameter | New parameter | | --------------- | ------------------------ | | | mbuffer_mem | +| | mt_aligner | | | samtools_sort_threads | | | skip_repeat_calling | | | skip_snv_calling | diff --git a/config.config b/config.config new file mode 100644 index 00000000..bcd0c0ba --- /dev/null +++ b/config.config @@ -0,0 +1,3 @@ +singularity { + runOptions = "--bind /home/ramprasad.neethiraj/tmp:/home/ramprasad.neethiraj/tmp" +} diff --git a/nextflow.config b/nextflow.config index b7857df5..db7a3967 100644 --- a/nextflow.config +++ b/nextflow.config @@ -53,6 +53,7 @@ params { // Alignment aligner = 'bwamem2' + mt_aligner = 'bwamem2' mbuffer_mem = 3072 samtools_sort_threads = 4 min_trimmed_length = 40 diff --git a/nextflow_schema.json b/nextflow_schema.json index dc91e7a4..bdfcb70b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -632,6 +632,13 @@ "fa_icon": "fas fa-align-center", "enum": ["bwa", "bwamem2", "bwameme", "sentieon"] }, + "mt_aligner": { + "type": "string", + "default": "bwamem2", + "description": "Specifies the alignment algorithm to use - available options are 'bwamem2' and 'sentieon'.", + "fa_icon": "fas fa-align-center", + "enum": ["bwa", "bwamem2", "sentieon"] + }, "samtools_sort_threads": { "type": "integer", "default": 4, diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index b2f91d27..5db1a573 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -97,12 +97,9 @@ workflow ALIGN { CONVERT_MT_BAM_TO_FASTQ.out.bam, ch_genome_bwaindex, ch_genome_bwamem2index, - ch_genome_bwamemeindex, ch_genome_fasta, ch_genome_dictionary, - ch_genome_fai, - val_mbuffer_mem, - val_sort_threads + ch_genome_fai ) ALIGN_MT_SHIFT ( @@ -110,12 +107,9 @@ workflow ALIGN { CONVERT_MT_BAM_TO_FASTQ.out.bam, ch_mtshift_bwaindex, ch_mtshift_bwamem2index, - ch_mtshift_bwamemeindex, ch_mtshift_fasta, ch_mtshift_dictionary, - ch_mtshift_fai, - val_mbuffer_mem, - val_sort_threads + ch_mtshift_fai ) ch_mt_marked_bam = ALIGN_MT.out.marked_bam diff --git a/subworkflows/local/alignment/align_MT.nf b/subworkflows/local/alignment/align_MT.nf index 14c65526..7a0c1d2c 100644 --- a/subworkflows/local/alignment/align_MT.nf +++ b/subworkflows/local/alignment/align_MT.nf @@ -5,7 +5,6 @@ include { BWA_MEM as BWA_MEM_MT } from '../../../modules/nf-core/bwa/mem/main' include { SENTIEON_BWAMEM as SENTIEON_BWAMEM_MT } from '../../../modules/nf-core/sentieon/bwamem/main' include { BWAMEM2_MEM as BWAMEM2_MEM_MT } from '../../../modules/nf-core/bwamem2/mem/main' -include { BWAMEME_MEM as BWAMEME_MEM_MT } from '../../../modules/nf-core/bwameme/mem/main' include { GATK4_MERGEBAMALIGNMENT as GATK4_MERGEBAMALIGNMENT_MT } from '../../../modules/nf-core/gatk4/mergebamalignment/main' include { PICARD_ADDORREPLACEREADGROUPS as PICARD_ADDORREPLACEREADGROUPS_MT } from '../../../modules/nf-core/picard/addorreplacereadgroups/main' include { PICARD_MARKDUPLICATES as PICARD_MARKDUPLICATES_MT } from '../../../modules/nf-core/picard/markduplicates/main' @@ -18,32 +17,25 @@ workflow ALIGN_MT { ch_ubam // channel: [mandatory] [ val(meta), path(bam) ] ch_bwaindex // channel: [mandatory for sentieon] [ val(meta), path(index) ] ch_bwamem2index // channel: [mandatory for bwamem2] [ val(meta), path(index) ] - ch_bwamemeindex // channel: [mandatory for bwameme] [ val(meta), path(index) ] ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_dict // channel: [mandatory] [ val(meta), path(dict) ] ch_fai // channel: [mandatory] [ val(meta), path(fai) ] - val_mbuffer_mem // integer: [mandatory] default: 3072 - val_sort_threads // integer: [mandatory] default: 4 main: ch_versions = Channel.empty() - if (params.aligner.equals("bwamem2")) { + if (params.mt_aligner.equals("bwamem2")) { BWAMEM2_MEM_MT (ch_fastq, ch_bwamem2index, ch_fasta, true) ch_align = BWAMEM2_MEM_MT.out.bam ch_versions = ch_versions.mix(BWAMEM2_MEM_MT.out.versions.first()) - } else if (params.aligner.equals("sentieon")) { + } else if (params.mt_aligner.equals("sentieon")) { SENTIEON_BWAMEM_MT ( ch_fastq, ch_bwaindex, ch_fasta, ch_fai ) ch_align = SENTIEON_BWAMEM_MT.out.bam_and_bai.map{ meta, bam, bai -> [meta, bam] } ch_versions = ch_versions.mix(SENTIEON_BWAMEM_MT.out.versions.first()) - } else if (params.aligner.equals("bwa")) { + } else if (params.mt_aligner.equals("bwa")) { BWA_MEM_MT ( ch_fastq, ch_bwaindex, ch_fasta, true ) ch_align = BWA_MEM_MT.out.bam ch_versions = ch_versions.mix(BWA_MEM_MT.out.versions.first()) - } else if (params.aligner.equals("bwameme")) { - BWAMEME_MEM_MT (ch_fastq, ch_bwamemeindex, ch_fasta, true, val_mbuffer_mem, val_sort_threads) - ch_align = BWAMEME_MEM_MT.out.bam - ch_versions = ch_versions.mix(BWAMEME_MEM_MT.out.versions.first()) } ch_align .join(ch_ubam, failOnMismatch:true, failOnDuplicate:true) From 0c8af9aab768a328f576127e3528bbdfae532ffb Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 19 Aug 2024 16:39:17 +0200 Subject: [PATCH 086/113] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce06d8f0..6cb215dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- A new parameter `mt_aligner` to control which aligner is used to align reads to mitochondria +- A new parameter `mt_aligner` to control which aligner is used to align reads to mitochondria [#600](https://github.com/nf-core/raredisease/pull/600) - A new parameter `par_bed` to pass a PAR bed files to deepvariant [#598](https://github.com/nf-core/raredisease/pull/598) - A new functionality to pass gzipped resources to vcfanno_extra_resources [#589](https://github.com/nf-core/raredisease/pull/589) - A new parameter `vcfanno_extra_resources` to pass an extra resource to vcfanno [#588](https://github.com/nf-core/raredisease/pull/588) From ceb1730027473a426f82a7a4d3f9a7b1b51ce1fe Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 19 Aug 2024 16:42:57 +0200 Subject: [PATCH 087/113] remove config.config --- config.config | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 config.config diff --git a/config.config b/config.config deleted file mode 100644 index bcd0c0ba..00000000 --- a/config.config +++ /dev/null @@ -1,3 +0,0 @@ -singularity { - runOptions = "--bind /home/ramprasad.neethiraj/tmp:/home/ramprasad.neethiraj/tmp" -} From af3355227b1be890e2a2a731d562b028388831d7 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 20 Aug 2024 10:19:14 +0200 Subject: [PATCH 088/113] review suggestions --- CHANGELOG.md | 1 + conf/modules/align_MT.config | 12 ------------ 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6cb215dd..1015ebb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- bwameme can no longer be used to align mitochondrial reads [#600](https://github.com/nf-core/raredisease/pull/600) - Males' X and Y chromosomes will be treated as haploids during variant calling by deepvariant [#598](https://github.com/nf-core/raredisease/pull/598) - Acceptable type for lane field in the samplesheet from number to string [#597](https://github.com/nf-core/raredisease/pull/597) - Allow `0` as a valid value for `sex` in the samplesheet [#595](https://github.com/nf-core/raredisease/pull/595) diff --git a/conf/modules/align_MT.config b/conf/modules/align_MT.config index a616272b..3fdbdbaa 100644 --- a/conf/modules/align_MT.config +++ b/conf/modules/align_MT.config @@ -23,12 +23,6 @@ process { ext.prefix = { "${meta.id}_sorted" } } - withName: '.*ALIGN_MT:BWAMEME_MEM_MT' { - ext.args = { "-7 -M -K 100000000 -R ${meta.read_group}" } - ext.args2 = { "-T ./samtools_sort_tmp" } - ext.prefix = { "${meta.id}_sorted" } - } - withName: '.*ALIGN_MT:BWA_MEM_MT' { ext.args = { "-M -K 100000000 -R ${meta.read_group}" } ext.args2 = { "-T ./samtools_sort_tmp" } @@ -80,12 +74,6 @@ process { ext.prefix = { "${meta.id}_sorted_shifted" } } - withName: '.*ALIGN_MT_SHIFT:BWAMEME_MEM_MT' { - ext.args = { "-7 -M -K 100000000 -R ${meta.read_group}" } - ext.args2 = { "-T ./samtools_sort_tmp" } - ext.prefix = { "${meta.id}_sorted_shifted" } - } - withName: '.*ALIGN_MT_SHIFT:SENTIEON_BWAMEM_MT' { ext.args = { "-M -K 10000000 -R ${meta.read_group}" } ext.args2 = { "-T ./samtools_sort_tmp" } From 58cf12645f142ed68db3a060e85aa1d3a16f965b Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 20 Aug 2024 10:25:37 +0000 Subject: [PATCH 089/113] Updating Sentieon DNAscope ml-model to v1.1 in test-configs --- conf/test.config | 2 +- conf/test_one_sample.config | 2 +- conf/test_sentieon.config | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/test.config b/conf/test.config index 439d4875..f0c503c9 100644 --- a/conf/test.config +++ b/conf/test.config @@ -40,7 +40,7 @@ params { intervals_wgs = params.pipelines_testdata_base_path + 'raredisease/reference/target_wgs.interval_list' intervals_y = params.pipelines_testdata_base_path + 'raredisease/reference/targetY.interval_list' known_dbsnp = params.pipelines_testdata_base_path + 'raredisease/reference/dbsnp_-138-.vcf.gz' - ml_model = 'https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model' + ml_model = 'https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.1.model' mobile_element_references = params.pipelines_testdata_base_path + 'raredisease/reference/mobile_element_references.tsv' mobile_element_svdb_annotations = params.pipelines_testdata_base_path + 'raredisease/reference/svdb_querydb_files.csv' reduced_penetrance = params.pipelines_testdata_base_path + 'raredisease/reference/reduced_penetrance.tsv' diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config index fc9b24cc..1aad988b 100644 --- a/conf/test_one_sample.config +++ b/conf/test_one_sample.config @@ -40,7 +40,7 @@ params { intervals_wgs = params.pipelines_testdata_base_path + 'raredisease/reference/target_wgs.interval_list' intervals_y = params.pipelines_testdata_base_path + 'raredisease/reference/targetY.interval_list' known_dbsnp = params.pipelines_testdata_base_path + 'raredisease/reference/dbsnp_-138-.vcf.gz' - ml_model = 'https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model' + ml_model = 'https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.1.model' mobile_element_references = params.pipelines_testdata_base_path + 'raredisease/reference/mobile_element_references.tsv' mobile_element_svdb_annotations = params.pipelines_testdata_base_path + 'raredisease/reference/svdb_querydb_files.csv' reduced_penetrance = params.pipelines_testdata_base_path + 'raredisease/reference/reduced_penetrance.tsv' diff --git a/conf/test_sentieon.config b/conf/test_sentieon.config index b440d47d..6776eb6a 100644 --- a/conf/test_sentieon.config +++ b/conf/test_sentieon.config @@ -38,7 +38,7 @@ params { intervals_wgs = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list" intervals_y = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list" known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz" - ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model" + ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.1.model" mobile_element_references = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mobile_element_references.tsv" mobile_element_svdb_annotations = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/svdb_querydb_files.csv" reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv" From fbf2d4f693e47c993308f63c474a1ac8ac98a64f Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 20 Aug 2024 11:59:59 +0000 Subject: [PATCH 090/113] Updating changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1015ebb4..afc1ba15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- Updated the model for Sentieon DNAScope to v1.1 [#601](https://github.com/nf-core/raredisease/pull/601) - bwameme can no longer be used to align mitochondrial reads [#600](https://github.com/nf-core/raredisease/pull/600) - Males' X and Y chromosomes will be treated as haploids during variant calling by deepvariant [#598](https://github.com/nf-core/raredisease/pull/598) - Acceptable type for lane field in the samplesheet from number to string [#597](https://github.com/nf-core/raredisease/pull/597) From 97c0ef66600ded53ca465f4c0dd274d01b9ca240 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 23 Aug 2024 14:34:11 +0200 Subject: [PATCH 091/113] first attempt --- subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf | 2 +- subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf index 0e10e6d5..15d3db9a 100644 --- a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf +++ b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf @@ -68,7 +68,7 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME { ch_align .map{ meta, bam -> new_id = meta.sample - new_meta = meta + [id:new_id, read_group:"\'@RG\\tID:" + new_id + "_" + meta.lane + "\\tPL:" + val_platform + "\\tSM:" + new_id + "\'"] - meta.subMap('lane') + new_meta = meta + [id:new_id, read_group:"\'@RG\\tID:" + new_id + "\\tPL:" + val_platform + "\\tSM:" + new_id + "\'"] - meta.subMap('lane') [groupKey(new_meta, new_meta.num_lanes), bam] } .groupTuple() diff --git a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf index dd3e1812..bafaf7ce 100644 --- a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf @@ -90,7 +90,7 @@ workflow PIPELINE_INITIALISATION { .combine( ch_original_input ) .map { counts, meta, fastq1, fastq2 -> new_meta = meta + [num_lanes:counts[meta.id], - read_group:"\'@RG\\tID:"+ fastq1.toString().split('/')[-1] + "\\tPL:" + params.platform.toUpperCase() + "\\tSM:" + meta.id + "\'"] + read_group:"\'@RG\\tID:"+ fastq1.simpleName + "_" + meta.lane + "\\tPL:" + params.platform.toUpperCase() + "\\tSM:" + meta.id + "\'"] if (!fastq2) { return [ new_meta + [ single_end:true ], [ fastq1 ] ] } else { From f27d33e8aeb7f785eb1f2cf5270c14ac04370166 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 26 Aug 2024 13:11:52 +0200 Subject: [PATCH 092/113] properly remove references to bwameme in mt subworkflows --- conf/modules/prepare_references.config | 11 +++-------- subworkflows/local/align.nf | 1 - subworkflows/local/prepare_references.nf | 4 ---- workflows/raredisease.nf | 2 -- 4 files changed, 3 insertions(+), 15 deletions(-) diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config index 10fda4a3..6240f2cb 100644 --- a/conf/modules/prepare_references.config +++ b/conf/modules/prepare_references.config @@ -35,12 +35,7 @@ process { } withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT_SHIFT' { - ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.aligner == "bwamem2"} - } - - withName: '.*PREPARE_REFERENCES:BWAMEME_INDEX_MT_SHIFT' { - ext.args = '-a meme' - ext.when = { !(params.analysis_type == "wes") && params.aligner == "bwameme"} + ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "bwamem2"} } withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_GENOME' { @@ -48,11 +43,11 @@ process { } withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_MT_SHIFT' { - ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.aligner == "sentieon"} + ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "sentieon"} } withName: '.*PREPARE_REFERENCES:BWA_INDEX_MT_SHIFT' { - ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.aligner == "bwa"} + ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "bwa"} } withName: '.*PREPARE_REFERENCES:BWA_INDEX_GENOME' { diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 5db1a573..29c7f5da 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -21,7 +21,6 @@ workflow ALIGN { ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] ch_mtshift_bwaindex // channel: [mandatory] [ val(meta), path(index) ] ch_mtshift_bwamem2index // channel: [mandatory] [ val(meta), path(index) ] - ch_mtshift_bwamemeindex // channel: [mandatory] [ val(meta), path(index) ] ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_mtshift_dictionary // channel: [mandatory] [ val(meta), path(dict) ] ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ] diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 4484bcd0..73301f53 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -7,7 +7,6 @@ include { BWA_INDEX as BWA_INDEX_MT_SHIFT } from '../../modul include { BWAMEM2_INDEX as BWAMEM2_INDEX_GENOME } from '../../modules/nf-core/bwamem2/index/main' include { BWAMEM2_INDEX as BWAMEM2_INDEX_MT_SHIFT } from '../../modules/nf-core/bwamem2/index/main' include { BWAMEME_INDEX as BWAMEME_INDEX_GENOME } from '../../modules/nf-core/bwameme/index/main' -include { BWAMEME_INDEX as BWAMEME_INDEX_MT_SHIFT } from '../../modules/nf-core/bwameme/index/main' include { CAT_CAT as CAT_CAT_BAIT } from '../../modules/nf-core/cat/cat/main' include { GATK4_BEDTOINTERVALLIST as GATK_BILT } from '../../modules/nf-core/gatk4/bedtointervallist/main' include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD } from '../../modules/nf-core/gatk4/createsequencedictionary/main' @@ -73,7 +72,6 @@ workflow PREPARE_REFERENCES { // MT alignment indices BWAMEM2_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa) - BWAMEME_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa) BWA_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa) SENTIEON_BWAINDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa) ch_bwa_mtshift = Channel.empty().mix(SENTIEON_BWAINDEX_MT_SHIFT.out.index, BWA_INDEX_MT_SHIFT.out.index).collect() @@ -143,7 +141,6 @@ workflow PREPARE_REFERENCES { ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_MT_SHIFT.out.versions) ch_versions = ch_versions.mix(GATK_SD_MT_SHIFT.out.versions) ch_versions = ch_versions.mix(GATK_SHIFTFASTA.out.versions) - ch_versions = ch_versions.mix(BWAMEME_INDEX_MT_SHIFT.out.versions) ch_versions = ch_versions.mix(BWAMEM2_INDEX_MT_SHIFT.out.versions) ch_versions = ch_versions.mix(BWA_INDEX_MT_SHIFT.out.versions) ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_MT_SHIFT.out.versions) @@ -175,7 +172,6 @@ workflow PREPARE_REFERENCES { mtshift_dict = GATK_SHIFTFASTA.out.dict.collect() // channel: [ path(dict) ] mtshift_bwa_index = ch_bwa_mtshift // channel: [ val(meta), path(index) ] mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] - mtshift_bwameme_index = BWAMEME_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ] known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ] diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 0234272a..10ef83b1 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -274,7 +274,6 @@ workflow RAREDISEASE { ch_mtshift_backchain = ch_references.mtshift_backchain ch_mtshift_bwaindex = ch_references.mtshift_bwa_index ch_mtshift_bwamem2index = ch_references.mtshift_bwamem2_index - ch_mtshift_bwamemeindex = ch_references.mtshift_bwameme_index ch_mtshift_dictionary = ch_references.mtshift_dict ch_mtshift_fai = ch_references.mtshift_fai ch_mtshift_fasta = ch_references.mtshift_fasta @@ -415,7 +414,6 @@ workflow RAREDISEASE { ch_genome_dictionary, ch_mtshift_bwaindex, ch_mtshift_bwamem2index, - ch_mtshift_bwamemeindex, ch_mtshift_fasta, ch_mtshift_dictionary, ch_mtshift_fai, From 818f7af84c1868440162a6efa1284d8af2ca9405 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 27 Aug 2024 09:42:35 +0200 Subject: [PATCH 093/113] issue when aligner and mt_aligner has different values --- conf/modules/prepare_references.config | 16 ++++++++-------- workflows/raredisease.nf | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config index 6240f2cb..25207856 100644 --- a/conf/modules/prepare_references.config +++ b/conf/modules/prepare_references.config @@ -26,7 +26,7 @@ process { } withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_GENOME' { - ext.when = {!params.bwamem2 && params.aligner == "bwamem2"} + ext.when = {!params.bwamem2 && (params.aligner == "bwamem2" || params.mt_aligner == "bwamem2")} } withName: '.*PREPARE_REFERENCES:BWAMEME_INDEX_GENOME' { @@ -34,12 +34,16 @@ process { ext.when = {!params.bwameme && params.aligner == "bwameme"} } - withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT_SHIFT' { - ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "bwamem2"} + withName: '.*PREPARE_REFERENCES:BWA_INDEX_GENOME' { + ext.when = {!params.bwa && (!(params.aligner == "sentieon") || params.mt_aligner == "bwa")} } withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_GENOME' { - ext.when = {!params.bwa && params.aligner == "sentieon"} + ext.when = {!params.bwa && (params.aligner == "sentieon" || params.mt_aligner == "sentieon")} + } + + withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT_SHIFT' { + ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "bwamem2"} } withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_MT_SHIFT' { @@ -50,10 +54,6 @@ process { ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "bwa"} } - withName: '.*PREPARE_REFERENCES:BWA_INDEX_GENOME' { - ext.when = {!params.bwa && (!(params.aligner == "sentieon") || params.aligner == "bwa")} - } - withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_GENOME' { ext.when = {!params.fai} } diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 10ef83b1..88425b42 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -191,7 +191,7 @@ workflow RAREDISEASE { ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect() ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() : Channel.empty() - ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() + ch_genome_dict_unprocessed = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() : Channel.empty() ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect() : Channel.value([[],[]]) @@ -212,7 +212,7 @@ workflow RAREDISEASE { PREPARE_REFERENCES ( ch_genome_fasta, ch_genome_fai, - ch_genome_dictionary, + ch_genome_dict_unprocessed, ch_mt_fasta, ch_gnomad_af_tab, ch_dbsnp, From 21becd6b820152cd8f4be8e3feab00f3e57e6659 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 27 Aug 2024 13:12:43 +0200 Subject: [PATCH 094/113] updaet changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index afc1ba15..5f4adb3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- Issues that cropped up when `aligner` and `mt_aligner` were different [#605](https://github.com/nf-core/raredisease/pull/605) - Update docs to show 'vep_plugin_files' as a mandatory parameter for SNV annotation [#594](https://github.com/nf-core/raredisease/issues/593) - Error in SVDB merge when only a single SV caller is run [#586](https://github.com/nf-core/raredisease/pull/586) - Errors due to misplaced version statements [#578](https://github.com/nf-core/raredisease/pull/578) From ca688e5d174c4878b967f4f8011df87fd900ccd5 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 27 Aug 2024 14:52:41 +0200 Subject: [PATCH 095/113] Update raredisease.nf --- workflows/raredisease.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 88425b42..10ef83b1 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -191,7 +191,7 @@ workflow RAREDISEASE { ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect() ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() : Channel.empty() - ch_genome_dict_unprocessed = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() + ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() : Channel.empty() ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect() : Channel.value([[],[]]) @@ -212,7 +212,7 @@ workflow RAREDISEASE { PREPARE_REFERENCES ( ch_genome_fasta, ch_genome_fai, - ch_genome_dict_unprocessed, + ch_genome_dictionary, ch_mt_fasta, ch_gnomad_af_tab, ch_dbsnp, From 67d4607f4aa6aafd5cd9a268e1b68a3180abea27 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 27 Aug 2024 15:35:08 +0200 Subject: [PATCH 096/113] update cytosure conditional --- workflows/raredisease.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 10ef83b1..8dbfae62 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -785,7 +785,7 @@ workflow RAREDISEASE { Generate CGH files from sequencing data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - if ( !params.skip_vcf2cytosure && params.analysis_type.equals("wgs") ) { + if ( !params.skip_vcf2cytosure && params.analysis_type.equals("wgs") && !params.skip_sv_calling) { GENERATE_CYTOSURE_FILES ( ch_sv_annotate.vcf_ann, ch_sv_annotate.tbi, From 40178f06204a6855446bde6717d727ae74709cbd Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 30 Aug 2024 11:45:33 +0200 Subject: [PATCH 097/113] add csi option --- conf/modules/prepare_references.config | 4 ++++ subworkflows/local/prepare_references.nf | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config index 25207856..401513ab 100644 --- a/conf/modules/prepare_references.config +++ b/conf/modules/prepare_references.config @@ -100,6 +100,10 @@ process { ext.when = { !params.target_bed.equals(null) && !params.target_bed.endsWith(".gz") } } + withName: '.*PREPARE_REFERENCES:TABIX_BGZIPINDEX_VCFANNOEXTRA' { + ext.args2 = '-C' + } + withName: '.*PREPARE_REFERENCES:GATK_BILT' { ext.when = { !params.target_bed.equals(null) } ext.prefix = { "${meta.id}_target" } diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 73301f53..8c39c614 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -103,8 +103,10 @@ workflow PREPARE_REFERENCES { .map { meta, tbi, vcf -> return [[vcf,tbi]]} .set {ch_vcfanno_index} - TABIX_BGZIPINDEX_VCFANNOEXTRA(ch_vcfanno_tabix_in.bgzipindex).gz_tbi - .map { meta, vcf, tbi -> return [[vcf,tbi]] } + TABIX_BGZIPINDEX_VCFANNOEXTRA(ch_vcfanno_tabix_in.bgzipindex) + Channel.empty() + .mix(TABIX_BGZIPINDEX_VCFANNOEXTRA.out.gz_tbi, TABIX_BGZIPINDEX_VCFANNOEXTRA.out.gz_csi) + .map { meta, vcf, index -> return [[vcf,index]] } .set {ch_vcfanno_bgzip} Channel.empty() From 6b7a8b3cea599e3ee03bb1acf63a7602c3461f38 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 30 Aug 2024 11:49:15 +0200 Subject: [PATCH 098/113] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f4adb3f..672c689b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- Default index for vcfanno extra annotation files from tbi to csi [#606](https://github.com/nf-core/raredisease/pull/606) - Updated the model for Sentieon DNAScope to v1.1 [#601](https://github.com/nf-core/raredisease/pull/601) - bwameme can no longer be used to align mitochondrial reads [#600](https://github.com/nf-core/raredisease/pull/600) - Males' X and Y chromosomes will be treated as haploids during variant calling by deepvariant [#598](https://github.com/nf-core/raredisease/pull/598) From 68133d62ded606b869e96b1a74ab92ab398082a7 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 11 Sep 2024 12:35:05 +0200 Subject: [PATCH 099/113] tiny fix --- nextflow_schema.json | 9 ++++----- workflows/raredisease.nf | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index bdfcb70b..bf20ae91 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -485,9 +485,9 @@ "analysis_type": { "type": "string", "default": "wgs", - "description": "Specifies which analysis type for the pipeline- either 'wgs','wes','mito'. This changes resources consumed and tools used.", + "description": "Specifies which analysis type for the pipeline- either 'wgs' or 'wes'. This changes resources consumed and tools used.", "fa_icon": "fas fa-align-center", - "enum": ["wgs", "wes", "mito"] + "enum": ["wgs", "wes"] }, "bwa_as_fallback": { "type": "boolean", @@ -498,9 +498,8 @@ "platform": { "type": "string", "default": "illumina", - "description": "Specifies which analysis type for the pipeline- either 'wgs','wes','mito'. This changes resources consumed and tools used.", - "fa_icon": "fas fa-align-center", - "enum": ["illumina"] + "description": "Specifies the platform on which the reads were sequenced.", + "fa_icon": "fas fa-align-center" }, "ngsbits_samplegender_method": { "type": "string", diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 8dbfae62..80e51572 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -785,7 +785,7 @@ workflow RAREDISEASE { Generate CGH files from sequencing data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - if ( !params.skip_vcf2cytosure && params.analysis_type.equals("wgs") && !params.skip_sv_calling) { + if ( !params.skip_vcf2cytosure && params.analysis_type.equals("wgs") && !params.skip_sv_calling && !params.skip_sv_annotation) { GENERATE_CYTOSURE_FILES ( ch_sv_annotate.vcf_ann, ch_sv_annotate.tbi, From 7354184051881d80c3faca5e29398756d7bd873d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 13 Sep 2024 14:06:39 +0200 Subject: [PATCH 100/113] bump version --- CHANGELOG.md | 2 +- assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 672c689b..39309232 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## 2.2.0dev - Dogmatix [XXXX-XX-XX] +## 2.2.0 - Dogmatix [2024-09-13] ### `Added` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index b8bb11a1..964d29bf 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -3,9 +3,9 @@ custom_logo_url: https://github.com/nf-core/raredisease/ custom_logo_title: "nf-core/raredisease" report_comment: > - This report has been generated by the nf-core/raredisease + This report has been generated by the nf-core/raredisease analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-raredisease-methods-description": order: -1000 diff --git a/nextflow.config b/nextflow.config index db7a3967..b6a62bee 100644 --- a/nextflow.config +++ b/nextflow.config @@ -307,7 +307,7 @@ manifest { description = """call and score variants from WGS/WES of rare disease patients""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.2.0dev' + version = '2.2.0' doi = '' } From eb0fde7039adde9a2aa8f8ba925a582a1bd386c1 Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 13 Sep 2024 14:44:34 +0200 Subject: [PATCH 101/113] vep110-112 --- modules.json | 4 +- .../ensemblvep/filtervep/environment.yml | 4 +- modules/nf-core/ensemblvep/filtervep/main.nf | 5 +- .../ensemblvep/filtervep/tests/main.nf.test | 136 ++++++++++++++++++ .../filtervep/tests/main.nf.test.snap | 26 ++++ .../filtervep/tests/nextflow.config | 10 ++ .../ensemblvep/filtervep/tests/tab.gz.config | 24 ++++ .../ensemblvep/filtervep/tests/tags.yml | 2 + .../ensemblvep/filtervep/tests/vcf.config | 23 +++ .../nf-core/ensemblvep/vep/environment.yml | 4 +- modules/nf-core/ensemblvep/vep/main.nf | 14 +- .../nf-core/ensemblvep/vep/tests/main.nf.test | 40 ++++-- .../ensemblvep/vep/tests/main.nf.test.snap | 26 ++++ .../ensemblvep/vep/tests/nextflow.config | 11 +- 14 files changed, 291 insertions(+), 38 deletions(-) create mode 100644 modules/nf-core/ensemblvep/filtervep/tests/main.nf.test create mode 100644 modules/nf-core/ensemblvep/filtervep/tests/main.nf.test.snap create mode 100644 modules/nf-core/ensemblvep/filtervep/tests/nextflow.config create mode 100644 modules/nf-core/ensemblvep/filtervep/tests/tab.gz.config create mode 100644 modules/nf-core/ensemblvep/filtervep/tests/tags.yml create mode 100644 modules/nf-core/ensemblvep/filtervep/tests/vcf.config create mode 100644 modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap diff --git a/modules.json b/modules.json index 64ceca8d..cc525b80 100644 --- a/modules.json +++ b/modules.json @@ -123,12 +123,12 @@ }, "ensemblvep/filtervep": { "branch": "master", - "git_sha": "214d575774c172062924ad3564b4f66655600730", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "ensemblvep/vep": { "branch": "master", - "git_sha": "76a0696a60c41c57fc5f6040ac31b11ce5d4d8dd", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "expansionhunter": { diff --git a/modules/nf-core/ensemblvep/filtervep/environment.yml b/modules/nf-core/ensemblvep/filtervep/environment.yml index d84dc89e..283a45bb 100644 --- a/modules/nf-core/ensemblvep/filtervep/environment.yml +++ b/modules/nf-core/ensemblvep/filtervep/environment.yml @@ -1,7 +1,5 @@ -name: ensemblvep_filtervep channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::ensembl-vep=110.0 + - bioconda::ensembl-vep=112.0 diff --git a/modules/nf-core/ensemblvep/filtervep/main.nf b/modules/nf-core/ensemblvep/filtervep/main.nf index 53abf772..a56bdb83 100644 --- a/modules/nf-core/ensemblvep/filtervep/main.nf +++ b/modules/nf-core/ensemblvep/filtervep/main.nf @@ -4,8 +4,8 @@ process ENSEMBLVEP_FILTERVEP { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' : - 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }" + 'https://depot.galaxyproject.org/singularity/ensembl-vep:112.0--pl5321h2a3209d_0' : + 'biocontainers/ensembl-vep:112.0--pl5321h2a3209d_0' }" input: tuple val(meta), path(input) @@ -47,4 +47,3 @@ process ENSEMBLVEP_FILTERVEP { END_VERSIONS """ } - diff --git a/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test b/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test new file mode 100644 index 00000000..1852e2ab --- /dev/null +++ b/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test @@ -0,0 +1,136 @@ +nextflow_process { + + name "Test Process ENSEMBLVEP_FILTERVEP" + script "../main.nf" + process "ENSEMBLVEP_FILTERVEP" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "ensemblvep" + tag "ensemblvep/vep" + tag "ensemblvep/filtervep" + tag "ensemblvep/download" + + // Test for filtering VCF file + test("test_ensemblvep_filtervep_vcf") { + config "./vcf.config" + + setup { + run("ENSEMBLVEP_DOWNLOAD") { + script "../../download/main.nf" + + process { + """ + input[0] = Channel.of([ + [id:"112_WBcel235"], + params.vep_genome, + params.vep_species, + params.vep_cache_version + ]) + """ + } + } + run("ENSEMBLVEP_VEP") { + script "../../vep/main.nf" + + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] + ]) + input[1] = params.vep_genome + input[2] = params.vep_species + input[3] = params.vep_cache_version + input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] } + input[5] = Channel.value([ + [id:"fasta"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[6] = [] + """ + } + } + } + + when { + process { + """ + input[0] = ENSEMBLVEP_VEP.out.vcf + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert path(process.out.output.get(0).get(1)).readLines().first().contains("##fileformat=VCFv4.2") } + ) + } + } + + // Test for filtering TAB file + test("test_ensemblvep_filtervep_tab_gz") { + config "./tab.gz.config" + + setup { + run("ENSEMBLVEP_DOWNLOAD") { + script "../../download/main.nf" + + process { + """ + input[0] = Channel.of([ + [id:"112_WBcel235"], + params.vep_genome, + params.vep_species, + params.vep_cache_version + ]) + """ + } + } + run("ENSEMBLVEP_VEP") { + script "../../vep/main.nf" + + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] + ]) + input[1] = params.vep_genome + input[2] = params.vep_species + input[3] = params.vep_cache_version + input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] } + input[5] = Channel.value([ + [id:"fasta"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[6] = [] + """ + } + } + } + + when { + process { + """ + input[0] = ENSEMBLVEP_VEP.out.tab + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert path(process.out.output.get(0).get(1)).readLines().first().contains("## ENSEMBL VARIANT EFFECT PREDICTOR v112.0") } + ) + } + } +} diff --git a/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test.snap b/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test.snap new file mode 100644 index 00000000..ddaa1dc1 --- /dev/null +++ b/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test.snap @@ -0,0 +1,26 @@ +{ + "test_ensemblvep_filtervep_vcf": { + "content": [ + [ + "versions.yml:md5,4d3217834548bbe6784e102e9348461d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-02T13:38:25.18143" + }, + "test_ensemblvep_filtervep_tab_gz": { + "content": [ + [ + "versions.yml:md5,4d3217834548bbe6784e102e9348461d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-02T13:57:11.471669" + } +} \ No newline at end of file diff --git a/modules/nf-core/ensemblvep/filtervep/tests/nextflow.config b/modules/nf-core/ensemblvep/filtervep/tests/nextflow.config new file mode 100644 index 00000000..aee2e62b --- /dev/null +++ b/modules/nf-core/ensemblvep/filtervep/tests/nextflow.config @@ -0,0 +1,10 @@ +/* +======================================================================================== + Nextflow config file for running tests +======================================================================================== +*/ +params { + vep_cache_version = "112" + vep_genome = "WBcel235" + vep_species = "caenorhabditis_elegans" +} diff --git a/modules/nf-core/ensemblvep/filtervep/tests/tab.gz.config b/modules/nf-core/ensemblvep/filtervep/tests/tab.gz.config new file mode 100644 index 00000000..0aa5ea75 --- /dev/null +++ b/modules/nf-core/ensemblvep/filtervep/tests/tab.gz.config @@ -0,0 +1,24 @@ +/* +======================================================================================== + Nextflow config file for running tests +======================================================================================== +*/ + +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: ENSEMBLVEP_DOWNLOAD { + ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' + } + + withName: ENSEMBLVEP_VEP { + ext.args = '--tab' + ext.prefix = { "${meta.id}_vep" } + } + + withName: ENSEMBLVEP_FILTERVEP { + ext.args = '--filter "Feature_type is Transcript"' + ext.suffix = "tab" + } +} diff --git a/modules/nf-core/ensemblvep/filtervep/tests/tags.yml b/modules/nf-core/ensemblvep/filtervep/tests/tags.yml new file mode 100644 index 00000000..b43bf40d --- /dev/null +++ b/modules/nf-core/ensemblvep/filtervep/tests/tags.yml @@ -0,0 +1,2 @@ +ensemblvep/filtervep: + - "modules/nf-core/ensemblvep/filtervep/**" diff --git a/modules/nf-core/ensemblvep/filtervep/tests/vcf.config b/modules/nf-core/ensemblvep/filtervep/tests/vcf.config new file mode 100644 index 00000000..0b65fe5c --- /dev/null +++ b/modules/nf-core/ensemblvep/filtervep/tests/vcf.config @@ -0,0 +1,23 @@ +/* +======================================================================================== + Nextflow config file for running tests +======================================================================================== +*/ + +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: ENSEMBLVEP_DOWNLOAD { + ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' + } + + withName: ENSEMBLVEP_VEP { + ext.args = '--vcf' + ext.prefix = { "${meta.id}_vep" } + } + + withName: ENSEMBLVEP_FILTERVEP { + ext.args = '--filter "Feature_type is Transcript"' + } +} diff --git a/modules/nf-core/ensemblvep/vep/environment.yml b/modules/nf-core/ensemblvep/vep/environment.yml index 7a127746..283a45bb 100644 --- a/modules/nf-core/ensemblvep/vep/environment.yml +++ b/modules/nf-core/ensemblvep/vep/environment.yml @@ -1,7 +1,5 @@ -name: ensemblvep_vep channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::ensembl-vep=110.0 + - bioconda::ensembl-vep=112.0 diff --git a/modules/nf-core/ensemblvep/vep/main.nf b/modules/nf-core/ensemblvep/vep/main.nf index a7fc5ad1..5f33265b 100644 --- a/modules/nf-core/ensemblvep/vep/main.nf +++ b/modules/nf-core/ensemblvep/vep/main.nf @@ -4,8 +4,8 @@ process ENSEMBLVEP_VEP { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' : - 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }" + 'https://depot.galaxyproject.org/singularity/ensembl-vep:112.0--pl5321h2a3209d_0' : + 'biocontainers/ensembl-vep:112.0--pl5321h2a3209d_0' }" input: tuple val(meta), path(vcf), path(custom_extra_files) @@ -20,7 +20,7 @@ process ENSEMBLVEP_VEP { tuple val(meta), path("*.vcf.gz") , optional:true, emit: vcf tuple val(meta), path("*.tab.gz") , optional:true, emit: tab tuple val(meta), path("*.json.gz") , optional:true, emit: json - path "*.summary.html" , optional:true, emit: report + path "*.html" , optional:true, emit: report path "versions.yml" , emit: versions when: @@ -57,10 +57,10 @@ process ENSEMBLVEP_VEP { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.vcf.gz - touch ${prefix}.tab.gz - touch ${prefix}.json.gz - touch ${prefix}.summary.html + echo "" | gzip > ${prefix}.vcf.gz + echo "" | gzip > ${prefix}.tab.gz + echo "" | gzip > ${prefix}.json.gz + touch ${prefix}_summary.html cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test b/modules/nf-core/ensemblvep/vep/tests/main.nf.test index f072dcab..e68fff3c 100644 --- a/modules/nf-core/ensemblvep/vep/tests/main.nf.test +++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test @@ -1,26 +1,31 @@ nextflow_process { name "Test Process ENSEMBLVEP_VEP" - script "modules/nf-core/ensemblvep/vep/main.nf" + script "../main.nf" process "ENSEMBLVEP_VEP" config "./nextflow.config" + tag "modules" tag "modules_nfcore" tag "ensemblvep" tag "ensemblvep/vep" tag "ensemblvep/download" - test("test_ensemblvep_vep_fasta_vcf") { - config "./vcf.config" setup { run("ENSEMBLVEP_DOWNLOAD") { script "../../download/main.nf" + process { """ - input[0] = Channel.of([[id:"${params.vep_cache_version}_${params.vep_genome}"], params.vep_genome, params.vep_species, params.vep_cache_version]) + input[0] = Channel.of([ + [id:"112_WBcel235"], + params.vep_genome, + params.vep_species, + params.vep_cache_version + ]) """ } } @@ -31,7 +36,7 @@ nextflow_process { """ input[0] = Channel.of([ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), [] ]) input[1] = params.vep_genome @@ -40,7 +45,7 @@ nextflow_process { input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] } input[5] = Channel.value([ [id:"fasta"], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]) input[6] = [] """ @@ -49,23 +54,29 @@ nextflow_process { then { assertAll( - {assert process.success}, - {assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2")} + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2") } ) } } test("test_ensemblvep_vep_fasta_tab_gz") { - config "./tab.gz.config" setup { run("ENSEMBLVEP_DOWNLOAD") { script "../../download/main.nf" + process { """ - input[0] = Channel.of([[id:"${params.vep_cache_version}_${params.vep_genome}"], params.vep_genome, params.vep_species, params.vep_cache_version]) + input[0] = Channel.of([ + [id:"112_WBcel235"], + params.vep_genome, + params.vep_species, + params.vep_cache_version + ]) """ } } @@ -76,7 +87,7 @@ nextflow_process { """ input[0] = Channel.of([ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), [] ]) input[1] = params.vep_genome @@ -85,7 +96,7 @@ nextflow_process { input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] } input[5] = Channel.value([ [id:"fasta"], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]) input[6] = [] """ @@ -94,8 +105,9 @@ nextflow_process { then { assertAll( - {assert process.success}, - {assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v110.0")} + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v112.0") } ) } } diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap b/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap new file mode 100644 index 00000000..1c4c0e4e --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap @@ -0,0 +1,26 @@ +{ + "test_ensemblvep_vep_fasta_tab_gz": { + "content": [ + [ + "versions.yml:md5,d06f1eb60f534489026d682eb3aa5559" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-02T10:15:18.228927" + }, + "test_ensemblvep_vep_fasta_vcf": { + "content": [ + [ + "versions.yml:md5,d06f1eb60f534489026d682eb3aa5559" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-02T10:14:50.193861" + } +} \ No newline at end of file diff --git a/modules/nf-core/ensemblvep/vep/tests/nextflow.config b/modules/nf-core/ensemblvep/vep/tests/nextflow.config index cfaef733..9aa48164 100644 --- a/modules/nf-core/ensemblvep/vep/tests/nextflow.config +++ b/modules/nf-core/ensemblvep/vep/tests/nextflow.config @@ -1,13 +1,12 @@ params { - vep_cache_version = "110" - vep_genome = "WBcel235" - vep_species = "caenorhabditis_elegans" + vep_cache_version = "112" + vep_genome = "WBcel235" + vep_species = "caenorhabditis_elegans" } process { - withName: ENSEMBLVEP_DOWNLOAD { - ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' + ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' + ext.prefix = { "${params.vep_cache_version}_${params.vep_genome}" } } - } From 0f41135bb9d6df32b46640d92eb27e544c3e0f16 Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 13 Sep 2024 14:46:21 +0200 Subject: [PATCH 102/113] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39309232..79c1b7ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- Update default vep container from v110-v112 [#609](https://github.com/nf-core/raredisease/pull/609) - Default index for vcfanno extra annotation files from tbi to csi [#606](https://github.com/nf-core/raredisease/pull/606) - Updated the model for Sentieon DNAScope to v1.1 [#601](https://github.com/nf-core/raredisease/pull/601) - bwameme can no longer be used to align mitochondrial reads [#600](https://github.com/nf-core/raredisease/pull/600) @@ -70,6 +71,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | Tool | Old version | New version | | ----------- | ----------- | ----------- | | Deepvariant | 1.5.0 | 1.6.1 | +| ensemblvep | 110 | 112 | ## 2.1.0 - Obelix [2024-05-29] From ef73852f01b44edf67b275e6e80f7bd8e0462f36 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 13 Sep 2024 15:34:14 +0200 Subject: [PATCH 103/113] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 79c1b7ca..0bf1e15b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,7 +40,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - Issues that cropped up when `aligner` and `mt_aligner` were different [#605](https://github.com/nf-core/raredisease/pull/605) -- Update docs to show 'vep_plugin_files' as a mandatory parameter for SNV annotation [#594](https://github.com/nf-core/raredisease/issues/593) +- Update docs to show 'vep_plugin_files' as a mandatory parameter for SNV annotation [#594](https://github.com/nf-core/raredisease/pull/594) - Error in SVDB merge when only a single SV caller is run [#586](https://github.com/nf-core/raredisease/pull/586) - Errors due to misplaced version statements [#578](https://github.com/nf-core/raredisease/pull/578) - Stub crashes due to peddy reported in [#566](https://github.com/nf-core/raredisease/issues/566) [#576](https://github.com/nf-core/raredisease/pull/576] From d540dc7359e90b8a983fe12e328c0a436b0ffe7a Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 13 Sep 2024 15:34:34 +0200 Subject: [PATCH 104/113] Apply suggestions from code review Co-authored-by: Felix Lenner <52530259+fellen31@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0bf1e15b..dddb1aa8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - A new parameter `scatter_count` to control how many interval files are created from a genome (used to parallelize annotations) [#585](https://github.com/nf-core/raredisease/pull/585) - Print warning messages if user intends to perform ranking when there are no affected samples [#579](https://github.com/nf-core/raredisease/pull/579) - Two new parameters `skip_repeat_annotation` and `skip_repeat_calling` to skip calling and annotation of repeat expansions [#574](https://github.com/nf-core/raredisease/pull/574) -- A new parameter `skip_smncopynumbercaller` to skip smncopynumbercaller module[#574](https://github.com/nf-core/raredisease/pull/574) +- A new parameter `skip_smncopynumbercaller` to skip smncopynumbercaller module [#574](https://github.com/nf-core/raredisease/pull/574) - A new parameter `skip_sv_calling` to skip sv calling workflow [#572](https://github.com/nf-core/raredisease/pull/572) - Two new parameters `skip_snv_calling` and `skip_repeat_analysis` to skip snv calling and repeat analysis respectively [#571](https://github.com/nf-core/raredisease/pull/571) - Two new parameters `mbuffer_mem` and `samtools_sort_threads` to control resources given to mbuffer and samtools sort in the bwameme module [#570](https://github.com/nf-core/raredisease/pull/570) From 8efb0d3d47b52811018fbf3931d821ed35692501 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 13 Sep 2024 15:34:54 +0200 Subject: [PATCH 105/113] Update conf/modules/prepare_references.config Co-authored-by: Felix Lenner <52530259+fellen31@users.noreply.github.com> --- conf/modules/prepare_references.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config index 401513ab..025172fe 100644 --- a/conf/modules/prepare_references.config +++ b/conf/modules/prepare_references.config @@ -101,7 +101,7 @@ process { } withName: '.*PREPARE_REFERENCES:TABIX_BGZIPINDEX_VCFANNOEXTRA' { - ext.args2 = '-C' + ext.args2 = '--csi' } withName: '.*PREPARE_REFERENCES:GATK_BILT' { From 8d7d36ea147e7849f0f9cc9c0bbaf6af94d63f8a Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 13 Sep 2024 15:37:18 +0200 Subject: [PATCH 106/113] Update nextflow_schema.json --- nextflow_schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index bf20ae91..3ccdc1f6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -627,14 +627,14 @@ "aligner": { "type": "string", "default": "bwamem2", - "description": "Specifies the alignment algorithm to use - available options are 'bwamem2' and 'sentieon'.", + "description": "Specifies the alignment algorithm to use - available options are 'bwamem2', 'bwa', 'bwameme' and 'sentieon'.", "fa_icon": "fas fa-align-center", "enum": ["bwa", "bwamem2", "bwameme", "sentieon"] }, "mt_aligner": { "type": "string", "default": "bwamem2", - "description": "Specifies the alignment algorithm to use - available options are 'bwamem2' and 'sentieon'.", + "description": "Specifies the alignment algorithm to use - available options are 'bwamem2', 'bwa' and 'sentieon'.", "fa_icon": "fas fa-align-center", "enum": ["bwa", "bwamem2", "sentieon"] }, From 56b6b48feda1d31bd6c7dc61ab3488eb5304ba49 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 13 Sep 2024 15:38:01 +0200 Subject: [PATCH 107/113] Update docs/usage.md Co-authored-by: Felix Lenner <52530259+fellen31@users.noreply.github.com> --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 4bbff0b5..21e16565 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -108,7 +108,7 @@ nf-core/raredisease will auto-detect whether a sample is single- or paired-end u | `lane` | Used to generate separate channels during the alignment step. It is of string type, and we recommend using a combination of flowcell and lane to distinguish between different runs of the same sample. | | `fastq_1` | Absolute path to FASTQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | | `fastq_2` | Absolute path to FASTQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `sex` | Sex (1=male; 2=female; for unknown sex use 0 or other). | +| `sex` | Sex (1=male; 2=female; for unknown sex use 0 or 'other'). | | `phenotype` | Affected status of patient (0 = missing; 1=unaffected; 2=affected). | | `paternal_id` | Sample ID of the father, can be blank if the father isn't part of the analysis or for samples other than the proband. | | `maternal_id` | Sample ID of the mother, can be blank if the mother isn't part of the analysis or for samples other than the proband. | From dc568ff3f706528a3425248df4e74ff815007973 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 13 Sep 2024 15:38:08 +0200 Subject: [PATCH 108/113] Update docs/output.md Co-authored-by: Felix Lenner <52530259+fellen31@users.noreply.github.com> --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 508b115d..d7f608b7 100644 --- a/docs/output.md +++ b/docs/output.md @@ -101,7 +101,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d ##### Picard's MarkDuplicates -[Picard MarkDuplicates](https://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates) is used for marking PCR duplicates that can occur during library amplification. This is essential as the presence of such duplicates results in false inflated coverages, which in turn can lead to overly-confident genotyping calls during variant calling. Only reads aligned by Bwa-mem2 bwameme and bwa are processed by this tool. By default, alignment files are published in bam format. If you would like to store cram files instead, set `--save_mapped_as_cram` to true. +[Picard MarkDuplicates](https://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates) is used for marking PCR duplicates that can occur during library amplification. This is essential as the presence of such duplicates results in false inflated coverages, which in turn can lead to overly-confident genotyping calls during variant calling. Only reads aligned by Bwa-mem2, bwameme and bwa are processed by this tool. By default, alignment files are published in bam format. If you would like to store cram files instead, set `--save_mapped_as_cram` to true.
    Output files from Alignment From 44385373246a674005f81a0faf3df5030abf33f0 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 13 Sep 2024 15:38:18 +0200 Subject: [PATCH 109/113] Update subworkflows/local/align.nf Co-authored-by: Felix Lenner <52530259+fellen31@users.noreply.github.com> --- subworkflows/local/align.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 29c7f5da..b332c84c 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -26,7 +26,7 @@ workflow ALIGN { ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ] val_mbuffer_mem // integer: [mandatory] memory in megabytes val_platform // string: [mandatory] illumina or a different technology - val_sort_threads // integer: [mandatory] memory in megabytes + val_sort_threads // integer: [mandatory] number of sorting threads main: ch_bwamem2_bam = Channel.empty() From 7cec124e35a75e536846c55393300077d333df48 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 13 Sep 2024 15:39:52 +0200 Subject: [PATCH 110/113] Update annotate_genome_snvs.nf --- subworkflows/local/annotate_genome_snvs.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/annotate_genome_snvs.nf b/subworkflows/local/annotate_genome_snvs.nf index 51d5a64c..396e2614 100644 --- a/subworkflows/local/annotate_genome_snvs.nf +++ b/subworkflows/local/annotate_genome_snvs.nf @@ -29,7 +29,7 @@ workflow ANNOTATE_GENOME_SNVS { ch_cadd_header // channel: [mandatory] [ path(txt) ] ch_cadd_resources // channel: [mandatory] [ path(annotation) ] ch_vcfanno_extra // channel: [mandatory] [ [path(vcf),path(index)] ] - ch_vcfanno_resources // channel: [mandatory] [ [path(vcf),path(index),...] ] + ch_vcfanno_resources // channel: [mandatory] [ [path(vcf1),path(index1),...,path(vcfn),path(indexn)] ] ch_vcfanno_lua // channel: [mandatory] [ path(lua) ] ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] val_vep_genome // string: [mandatory] GRCh37 or GRCh38 From 36ab790f8d110584ea7ea2255576b276a782b1a0 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 13 Sep 2024 15:43:28 +0200 Subject: [PATCH 111/113] Update annotate_mt_snvs.nf --- subworkflows/local/annotate_mt_snvs.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf index bb5dca6b..262e72c4 100644 --- a/subworkflows/local/annotate_mt_snvs.nf +++ b/subworkflows/local/annotate_mt_snvs.nf @@ -21,7 +21,7 @@ workflow ANNOTATE_MT_SNVS { ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_vcfanno_extra // channel: [mandatory] [ [path(vcf),path(index).....] ] ch_vcfanno_lua // channel: [mandatory] [ path(lua) ] - ch_vcfanno_resources // channel: [mandatory] [ [path(vcf),path(index),.....] ] + ch_vcfanno_resources // channel: [mandatory] [ [path(vcf1),path(index1),...,path(vcfn),path(indexn)] ] ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] val_vep_genome // string: [mandatory] GRCh37 or GRCh38 val_vep_cache_version // string: [mandatory] 107 From fbbd1eacf518e5bf38ae6700ffbb82351255e54d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 13 Sep 2024 15:44:12 +0200 Subject: [PATCH 112/113] Update annotate_mt_snvs.nf --- subworkflows/local/annotate_mt_snvs.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf index 262e72c4..d2a82879 100644 --- a/subworkflows/local/annotate_mt_snvs.nf +++ b/subworkflows/local/annotate_mt_snvs.nf @@ -19,7 +19,7 @@ workflow ANNOTATE_MT_SNVS { ch_cadd_header // channel: [mandatory] [ path(txt) ] ch_cadd_resources // channel: [mandatory] [ path(annotation) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_vcfanno_extra // channel: [mandatory] [ [path(vcf),path(index).....] ] + ch_vcfanno_extra // channel: [mandatory] [ [path(vcf),path(index)] ] ch_vcfanno_lua // channel: [mandatory] [ path(lua) ] ch_vcfanno_resources // channel: [mandatory] [ [path(vcf1),path(index1),...,path(vcfn),path(indexn)] ] ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] From fb77d3f84bb6b0d1440635fc268f03a696f2d45e Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 13 Sep 2024 15:47:24 +0200 Subject: [PATCH 113/113] fix lint --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 21e16565..0304c46a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -108,7 +108,7 @@ nf-core/raredisease will auto-detect whether a sample is single- or paired-end u | `lane` | Used to generate separate channels during the alignment step. It is of string type, and we recommend using a combination of flowcell and lane to distinguish between different runs of the same sample. | | `fastq_1` | Absolute path to FASTQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | | `fastq_2` | Absolute path to FASTQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `sex` | Sex (1=male; 2=female; for unknown sex use 0 or 'other'). | +| `sex` | Sex (1=male; 2=female; for unknown sex use 0 or 'other'). | | `phenotype` | Affected status of patient (0 = missing; 1=unaffected; 2=affected). | | `paternal_id` | Sample ID of the father, can be blank if the father isn't part of the analysis or for samples other than the proband. | | `maternal_id` | Sample ID of the mother, can be blank if the mother isn't part of the analysis or for samples other than the proband. |