Skip to content

Commit

Permalink
Merge pull request #608 from nf-core/mito
Browse files Browse the repository at this point in the history
Add option to analyse only mitochondria
  • Loading branch information
ramprasadn authored Sep 24, 2024
2 parents 695ccc3 + 877cb31 commit c3aacee
Show file tree
Hide file tree
Showing 10 changed files with 110 additions and 64 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Added`

- A new analysis option `mito` to call and annotate only mitochondrial variants [#608](https://github.com/nf-core/raredisease/pull/608)

### `Changed`

### `Fixed`
Expand Down
20 changes: 10 additions & 10 deletions conf/modules/prepare_references.config
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,16 @@ process {
ext.when = {!params.bwa && (params.aligner == "sentieon" || params.mt_aligner == "sentieon")}
}

withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT_SHIFT' {
ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "bwamem2"}
withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT.*' {
ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) && params.mt_aligner == "bwamem2"}
}

withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_MT_SHIFT' {
ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "sentieon"}
withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_MT.*' {
ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) && params.mt_aligner == "sentieon"}
}

withName: '.*PREPARE_REFERENCES:BWA_INDEX_MT_SHIFT' {
ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "bwa"}
withName: '.*PREPARE_REFERENCES:BWA_INDEX_MT.*' {
ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) && params.mt_aligner == "bwa"}
}

withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_GENOME' {
Expand All @@ -67,8 +67,8 @@ process {
ext.when = {!params.mt_fasta}
}

withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_MT_SHIFT' {
ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) }
withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_MT' {
ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) }
}

withName: '.*PREPARE_REFERENCES:GATK_SD' {
Expand All @@ -79,8 +79,8 @@ process {
ext.args = { "--interval-file-name ${meta.id}_mt" }
}

withName: '.*PREPARE_REFERENCES:GATK_SD_MT_SHIFT' {
ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes)}
withName: '.*PREPARE_REFERENCES:GATK_SD_MT' {
ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes)}
}

withName: '.*PREPARE_REFERENCES:TABIX_DBSNP' {
Expand Down
2 changes: 1 addition & 1 deletion docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support.
<summary>Output files</summary>

- `call_sv/genome`
- `<case_id>_sv_merge.vcf.gz`: file containing the merged variant calls.
- `<case_id>_sv_merge.vcf.gz`: file containing the merged variant calls. As of version 2.3.0, this file also contains mitochondrial structural variants.
- `<case_id>_sv_merge.vcf.gz.tbi`: index of the file containing the merged variant calls.

</details>
Expand Down
2 changes: 1 addition & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ The mandatory and optional parameters for each category are tabulated below.

<sup>1</sup>Default variant caller is DeepVariant, but you have the option to use Sentieon as well.<br />
<sup>2</sup>These parameters are only used by Sentieon.<br />
<sup>3</sup>Default is WGS, but you have the option to choose WES as well.<br />
<sup>3</sup>Default is `WGS`, but you have the option to choose `WES` and `mito` as well.<br />
<sup>4</sup>This parameter is only used by Deepvariant.<br />

##### 5. Variant calling - Structural variants
Expand Down
4 changes: 2 additions & 2 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -485,9 +485,9 @@
"analysis_type": {
"type": "string",
"default": "wgs",
"description": "Specifies which analysis type for the pipeline- either 'wgs' or 'wes'. This changes resources consumed and tools used.",
"description": "Specifies which analysis type for the pipeline- either 'wgs', 'wes' or 'mito'. This changes resources consumed and tools used.",
"fa_icon": "fas fa-align-center",
"enum": ["wgs", "wes"]
"enum": ["wgs", "wes", "mito"]
},
"bwa_as_fallback": {
"type": "boolean",
Expand Down
19 changes: 12 additions & 7 deletions subworkflows/local/align.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,16 @@ workflow ALIGN {
ch_genome_bwamem2index // channel: [mandatory] [ val(meta), path(index) ]
ch_genome_bwamemeindex // channel: [mandatory] [ val(meta), path(index) ]
ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
ch_mt_bwaindex // channel: [mandatory] [ val(meta), path(index) ]
ch_mt_bwamem2index // channel: [mandatory] [ val(meta), path(index) ]
ch_mt_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
ch_mt_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_mt_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_mtshift_bwaindex // channel: [mandatory] [ val(meta), path(index) ]
ch_mtshift_bwamem2index // channel: [mandatory] [ val(meta), path(index) ]
ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_mtshift_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
val_mbuffer_mem // integer: [mandatory] memory in megabytes
val_platform // string: [mandatory] illumina or a different technology
val_sort_threads // integer: [mandatory] number of sorting threads
Expand Down Expand Up @@ -83,7 +88,7 @@ workflow ALIGN {

// PREPARING READS FOR MT ALIGNMENT

if (params.analysis_type.equals("wgs") || params.run_mt_for_wes) {
if (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) {
CONVERT_MT_BAM_TO_FASTQ (
ch_genome_bam_bai,
ch_genome_fasta,
Expand All @@ -94,11 +99,11 @@ workflow ALIGN {
ALIGN_MT (
CONVERT_MT_BAM_TO_FASTQ.out.fastq,
CONVERT_MT_BAM_TO_FASTQ.out.bam,
ch_genome_bwaindex,
ch_genome_bwamem2index,
ch_genome_fasta,
ch_genome_dictionary,
ch_genome_fai
ch_mt_bwaindex,
ch_mt_bwamem2index,
ch_mt_fasta,
ch_mt_dictionary,
ch_mt_fai
)

ALIGN_MT_SHIFT (
Expand Down
23 changes: 13 additions & 10 deletions subworkflows/local/call_snv.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@ workflow CALL_SNV {
ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
ch_mt_intervals // channel: [optional] [ path(interval_list) ]
ch_mtshift_fasta // channel: [optional] [ val(meta), path(fasta) ]
ch_mtshift_fai // channel: [optional] [ val(meta), path(fai) ]
ch_mt_dictionary // channel: [optional] [ val(meta), path(dict) ]
ch_mt_fai // channel: [optional] [ val(meta), path(fai) ]
ch_mt_fasta // channel: [optional] [ val(meta), path(fasta) ]
ch_mtshift_dictionary // channel: [optional] [ val(meta), path(dict) ]
ch_mtshift_fai // channel: [optional] [ val(meta), path(fai) ]
ch_mtshift_fasta // channel: [optional] [ val(meta), path(fasta) ]
ch_mtshift_intervals // channel: [optional] [ path(interval_list) ]
ch_mtshift_backchain // channel: [mandatory] [ val(meta), path(back_chain) ]
ch_dbsnp // channel: [optional] [ val(meta), path(vcf) ]
Expand All @@ -46,7 +49,7 @@ workflow CALL_SNV {
ch_sentieon_gvcf = Channel.empty()
ch_sentieon_gtbi = Channel.empty()

if (params.variant_caller.equals("deepvariant")) {
if (params.variant_caller.equals("deepvariant") && !params.analysis_type.equals("mito")) {
CALL_SNV_DEEPVARIANT ( // triggered only when params.variant_caller is set as deepvariant
ch_genome_bam_bai,
ch_genome_fasta,
Expand Down Expand Up @@ -97,12 +100,12 @@ workflow CALL_SNV {
ch_genome_tabix = GATK4_SELECTVARIANTS.out.tbi
ch_genome_vcf_tabix = ch_genome_vcf.join(ch_genome_tabix, failOnMismatch:true, failOnDuplicate:true)

if (params.analysis_type.equals("wgs") || params.run_mt_for_wes) {
if (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) {
CALL_SNV_MT(
ch_mt_bam_bai,
ch_genome_fasta,
ch_genome_fai,
ch_genome_dictionary,
ch_mt_fasta,
ch_mt_fai,
ch_mt_dictionary,
ch_mt_intervals
)

Expand All @@ -117,9 +120,9 @@ workflow CALL_SNV {
POSTPROCESS_MT_CALLS(
CALL_SNV_MT.out.vcf,
CALL_SNV_MT_SHIFT.out.vcf,
ch_genome_fasta,
ch_genome_dictionary,
ch_genome_fai,
ch_mt_fasta,
ch_mt_dictionary,
ch_mt_fai,
ch_mtshift_backchain,
ch_case_info,
ch_foundin_header,
Expand Down
47 changes: 27 additions & 20 deletions subworkflows/local/call_structural_variants.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,16 @@ workflow CALL_STRUCTURAL_VARIANTS {

main:
ch_versions = Channel.empty()
ch_merged_svs = Channel.empty()
ch_merged_tbi = Channel.empty()

CALL_SV_MANTA (ch_genome_bam, ch_genome_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_target_bed)
.diploid_sv_vcf
.collect{it[1]}
.set{ manta_vcf }
if (!params.analysis_type.equals("mito")) {
CALL_SV_MANTA (ch_genome_bam, ch_genome_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_target_bed)
.diploid_sv_vcf
.collect{it[1]}
.set{ manta_vcf }
ch_versions = ch_versions.mix(CALL_SV_MANTA.out.versions)
}

if (params.analysis_type.equals("wgs")) {
CALL_SV_TIDDIT (ch_genome_bam_bai, ch_genome_fasta, ch_bwa_index, ch_case_info)
Expand All @@ -61,7 +66,7 @@ workflow CALL_STRUCTURAL_VARIANTS {
ch_versions = ch_versions.mix(CALL_SV_GERMLINECNVCALLER.out.versions)
}

if (params.analysis_type.equals("wgs") || params.run_mt_for_wes) {
if (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) {
CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta)
ch_versions = ch_versions.mix(CALL_SV_MT.out.versions)
}
Expand All @@ -74,39 +79,41 @@ workflow CALL_STRUCTURAL_VARIANTS {
.combine(cnvnator_vcf)
.toList()
.set { vcf_list }
} else {
} else if (!params.analysis_type.equals("mito")) {
manta_vcf
.toList()
.set { vcf_list }
}
} else if (params.analysis_type.equals("wgs")){
} else if (params.analysis_type.equals("wgs")) {
tiddit_vcf
.combine(manta_vcf)
.combine(gcnvcaller_vcf)
.combine(cnvnator_vcf)
.toList()
.set { vcf_list }
} else {
} else if (!params.analysis_type.equals("mito")) {
manta_vcf
.combine(gcnvcaller_vcf)
.toList()
.set { vcf_list }
}

ch_case_info
.combine(vcf_list)
.set { merge_input_vcfs }
if (!params.analysis_type.equals("mito")) {
ch_case_info
.combine(vcf_list)
.set { merge_input_vcfs }

SVDB_MERGE (merge_input_vcfs, ch_svcaller_priority)
SVDB_MERGE (merge_input_vcfs, ch_svcaller_priority)

TABIX_TABIX (SVDB_MERGE.out.vcf)

ch_versions = ch_versions.mix(CALL_SV_MANTA.out.versions)
ch_versions = ch_versions.mix(TABIX_TABIX.out.versions)
ch_versions = ch_versions.mix(SVDB_MERGE.out.versions)
TABIX_TABIX (SVDB_MERGE.out.vcf)
ch_merged_svs = SVDB_MERGE.out.vcf
ch_merged_tbi = TABIX_TABIX.out.tbi
ch_versions = ch_versions.mix(TABIX_TABIX.out.versions)
ch_versions = ch_versions.mix(SVDB_MERGE.out.versions)
}

emit:
vcf = SVDB_MERGE.out.vcf // channel: [ val(meta), path(vcf)]
tbi = TABIX_TABIX.out.tbi // channel: [ val(meta), path(tbi)]
versions = ch_versions // channel: [ path(versions.yml) ]
vcf = ch_merged_svs // channel: [ val(meta), path(vcf)]
tbi = ch_merged_tbi // channel: [ val(meta), path(tbi)]
versions = ch_versions // channel: [ path(versions.yml) ]
}
34 changes: 25 additions & 9 deletions subworkflows/local/prepare_references.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,25 @@
//

include { BWA_INDEX as BWA_INDEX_GENOME } from '../../modules/nf-core/bwa/index/main'
include { BWA_INDEX as BWA_INDEX_MT } from '../../modules/nf-core/bwa/index/main'
include { BWA_INDEX as BWA_INDEX_MT_SHIFT } from '../../modules/nf-core/bwa/index/main'
include { BWAMEM2_INDEX as BWAMEM2_INDEX_GENOME } from '../../modules/nf-core/bwamem2/index/main'
include { BWAMEM2_INDEX as BWAMEM2_INDEX_MT } from '../../modules/nf-core/bwamem2/index/main'
include { BWAMEM2_INDEX as BWAMEM2_INDEX_MT_SHIFT } from '../../modules/nf-core/bwamem2/index/main'
include { BWAMEME_INDEX as BWAMEME_INDEX_GENOME } from '../../modules/nf-core/bwameme/index/main'
include { CAT_CAT as CAT_CAT_BAIT } from '../../modules/nf-core/cat/cat/main'
include { GATK4_BEDTOINTERVALLIST as GATK_BILT } from '../../modules/nf-core/gatk4/bedtointervallist/main'
include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD } from '../../modules/nf-core/gatk4/createsequencedictionary/main'
include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD_MT_SHIFT } from '../../modules/nf-core/gatk4/createsequencedictionary/main'
include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD_MT } from '../../modules/nf-core/gatk4/createsequencedictionary/main'
include { GATK4_INTERVALLISTTOOLS as GATK_ILT } from '../../modules/nf-core/gatk4/intervallisttools/main'
include { GATK4_SHIFTFASTA as GATK_SHIFTFASTA } from '../../modules/nf-core/gatk4/shiftfasta/main'
include { GET_CHROM_SIZES } from '../../modules/local/get_chrom_sizes'
include { RTGTOOLS_FORMAT } from '../../modules/nf-core/rtgtools/format/main'
include { SAMTOOLS_FAIDX as SAMTOOLS_EXTRACT_MT } from '../../modules/nf-core/samtools/faidx/main'
include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_GENOME } from '../../modules/nf-core/samtools/faidx/main'
include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_MT_SHIFT } from '../../modules/nf-core/samtools/faidx/main'
include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_MT } from '../../modules/nf-core/samtools/faidx/main'
include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_GENOME } from '../../modules/nf-core/sentieon/bwaindex/main'
include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_MT } from '../../modules/nf-core/sentieon/bwaindex/main'
include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_MT_SHIFT } from '../../modules/nf-core/sentieon/bwaindex/main'
include { TABIX_BGZIPTABIX as TABIX_PBT } from '../../modules/nf-core/tabix/bgziptabix/main'
include { TABIX_BGZIPTABIX as TABIX_BGZIPINDEX_VCFANNOEXTRA } from '../../modules/nf-core/tabix/bgziptabix/main'
Expand Down Expand Up @@ -66,11 +69,16 @@ workflow PREPARE_REFERENCES {
// MT genome indices
SAMTOOLS_EXTRACT_MT(ch_genome_fasta, ch_fai)
ch_mt_fasta_in = Channel.empty().mix(ch_mt_fasta, SAMTOOLS_EXTRACT_MT.out.fa).collect()
SAMTOOLS_FAIDX_MT_SHIFT(ch_mt_fasta_in, [[],[]])
GATK_SD_MT_SHIFT(ch_mt_fasta_in)
GATK_SHIFTFASTA(ch_mt_fasta_in, SAMTOOLS_FAIDX_MT_SHIFT.out.fai, GATK_SD_MT_SHIFT.out.dict)
SAMTOOLS_FAIDX_MT(ch_mt_fasta_in, [[],[]])
GATK_SD_MT(ch_mt_fasta_in)
GATK_SHIFTFASTA(ch_mt_fasta_in, SAMTOOLS_FAIDX_MT.out.fai, GATK_SD_MT.out.dict)

// MT alignment indices
BWAMEM2_INDEX_MT(ch_mt_fasta_in)
BWA_INDEX_MT(ch_mt_fasta_in)
SENTIEON_BWAINDEX_MT(ch_mt_fasta_in)
ch_bwa_mt = Channel.empty().mix(SENTIEON_BWAINDEX_MT.out.index, BWA_INDEX_MT.out.index).collect()

BWAMEM2_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa)
BWA_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa)
SENTIEON_BWAINDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa)
Expand Down Expand Up @@ -140,9 +148,12 @@ workflow PREPARE_REFERENCES {
ch_versions = ch_versions.mix(GATK_SD.out.versions)
ch_versions = ch_versions.mix(GET_CHROM_SIZES.out.versions)
ch_versions = ch_versions.mix(SAMTOOLS_EXTRACT_MT.out.versions)
ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(GATK_SD_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_MT.out.versions)
ch_versions = ch_versions.mix(GATK_SD_MT.out.versions)
ch_versions = ch_versions.mix(GATK_SHIFTFASTA.out.versions)
ch_versions = ch_versions.mix(BWAMEM2_INDEX_MT.out.versions)
ch_versions = ch_versions.mix(BWA_INDEX_MT.out.versions)
ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_MT.out.versions)
ch_versions = ch_versions.mix(BWAMEM2_INDEX_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(BWA_INDEX_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_MT_SHIFT.out.versions)
Expand All @@ -167,11 +178,16 @@ workflow PREPARE_REFERENCES {
genome_dict = ch_dict // channel: [ val(meta), path(dict) ]
sdf = RTGTOOLS_FORMAT.out.sdf // channel: [ val (meta), path(intervals) ]
mt_intervals = ch_shiftfasta_mtintervals.intervals.collect() // channel: [ path(intervals) ]
mt_bwa_index = ch_bwa_mt // channel: [ val(meta), path(index) ]
mt_bwamem2_index = BWAMEM2_INDEX_MT.out.index.collect() // channel: [ val(meta), path(index) ]
mt_dict = GATK_SD_MT.out.dict.collect() // channel: [ val(meta), path(dict) ]
mt_fasta = ch_mt_fasta_in.collect() // channel: [ val(meta), path(fasta) ]
mt_fai = SAMTOOLS_FAIDX_MT.out.fai.collect() // channel: [ val(meta), path(fai) ]
mtshift_intervals = ch_shiftfasta_mtintervals.shift_intervals.collect() // channel: [ path(intervals) ]
mtshift_backchain = GATK_SHIFTFASTA.out.shift_back_chain.collect() // channel: [ val(meta), path(backchain) ]
mtshift_dict = GATK_SHIFTFASTA.out.dict // channel: [ val(meta), path(dict) ]
mtshift_fai = GATK_SHIFTFASTA.out.shift_fai.collect() // channel: [ val(meta), path(fai) ]
mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fai) ]
mtshift_dict = GATK_SHIFTFASTA.out.dict.collect() // channel: [ path(dict) ]
mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fasta) ]
mtshift_bwa_index = ch_bwa_mtshift // channel: [ val(meta), path(index) ]
mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ]

Expand Down
Loading

0 comments on commit c3aacee

Please sign in to comment.