diff --git a/CHANGELOG.md b/CHANGELOG.md index 338b7eab..05d9614c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- A new aligner, bwameme [#553](https://github.com/nf-core/raredisease/pull/553) - A new parameter `run_mt_for_wes` to turn on mitochondrial analysis for targeted analysis [#552](https://github.com/nf-core/raredisease/pull/552) - A new parameter `bwa_as_fallback` to switch aligner to bwa in case bwamem2 fails [#551](https://github.com/nf-core/raredisease/pull/551) @@ -25,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | Old parameter | New parameter | | ------------- | --------------- | +| | bwameme | | | bwa_as_fallback | | | run_mt_for_wes | diff --git a/conf/modules/align_MT.config b/conf/modules/align_MT.config index 6ec96d0d..a616272b 100644 --- a/conf/modules/align_MT.config +++ b/conf/modules/align_MT.config @@ -19,16 +19,25 @@ process { withName: '.*ALIGN_MT:BWAMEM2_MEM_MT' { ext.args = { "-M -K 100000000 -R ${meta.read_group}" } + ext.args2 = { "-T ./samtools_sort_tmp" } + ext.prefix = { "${meta.id}_sorted" } + } + + withName: '.*ALIGN_MT:BWAMEME_MEM_MT' { + ext.args = { "-7 -M -K 100000000 -R ${meta.read_group}" } + ext.args2 = { "-T ./samtools_sort_tmp" } ext.prefix = { "${meta.id}_sorted" } } withName: '.*ALIGN_MT:BWA_MEM_MT' { ext.args = { "-M -K 100000000 -R ${meta.read_group}" } + ext.args2 = { "-T ./samtools_sort_tmp" } ext.prefix = { "${meta.id}_sorted" } } withName: '.*ALIGN_MT:SENTIEON_BWAMEM_MT' { ext.args = { "-M -K 10000000 -R ${meta.read_group}" } + ext.args2 = { "-T ./samtools_sort_tmp" } ext.prefix = { "${meta.id}_sorted" } } @@ -67,16 +76,25 @@ process { withName: '.*ALIGN_MT_SHIFT:BWAMEM2_MEM_MT' { ext.args = { "-M -K 100000000 -R ${meta.read_group}" } + ext.args2 = { "-T ./samtools_sort_tmp" } + ext.prefix = { "${meta.id}_sorted_shifted" } + } + + withName: '.*ALIGN_MT_SHIFT:BWAMEME_MEM_MT' { + ext.args = { "-7 -M -K 100000000 -R ${meta.read_group}" } + ext.args2 = { "-T ./samtools_sort_tmp" } ext.prefix = { "${meta.id}_sorted_shifted" } } withName: '.*ALIGN_MT_SHIFT:SENTIEON_BWAMEM_MT' { ext.args = { "-M -K 10000000 -R ${meta.read_group}" } + ext.args2 = { "-T ./samtools_sort_tmp" } ext.prefix = { "${meta.id}_sorted_shifted" } } withName: '.*ALIGN_MT_SHIFT:BWA_MEM_MT' { ext.args = { "-M -K 100000000 -R ${meta.read_group}" } + ext.args2 = { "-T ./samtools_sort_tmp" } ext.prefix = { "${meta.id}_sorted_shifted" } } diff --git a/conf/modules/align_bwa_bwamem2.config b/conf/modules/align_bwa_bwamem2_bwameme.config similarity index 73% rename from conf/modules/align_bwa_bwamem2.config rename to conf/modules/align_bwa_bwamem2_bwameme.config index 2ff7ea89..c217f8da 100644 --- a/conf/modules/align_bwa_bwamem2.config +++ b/conf/modules/align_bwa_bwamem2_bwameme.config @@ -17,33 +17,40 @@ process { - withName: '.*ALIGN:ALIGN_BWA_BWAMEM2:BWAMEM2_MEM' { + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:BWAMEM2_MEM' { ext.args = { "-M -K 100000000 -R ${meta.read_group}" } ext.args2 = { "-T ./samtools_sort_tmp" } ext.prefix = { "${meta.id}_sorted" } } - withName: '.*ALIGN:ALIGN_BWA_BWAMEM2:BWAMEM_FALLBACK' { + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:BWAMEM_FALLBACK' { ext.args = { "-M -K 100000000 -R ${meta.read_group}" } ext.args2 = { "-T ./samtools_sort_tmp" } ext.prefix = { "${meta.id}_sorted" } } - withName: '.*ALIGN:ALIGN_BWA_BWAMEM2:BWA_MEM' { + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:BWAMEME_MEM' { + ext.args = { "-7 -M -K 100000000 -R ${meta.read_group}" } + ext.args2 = { "-T ./samtools_sort_tmp" } + ext.prefix = { "${meta.id}_sorted" } + ext.when = { params.aligner.equals("bwameme") } + } + + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:BWA_MEM' { ext.args = { "-M -K 100000000 -R ${meta.read_group}" } ext.args2 = { "-T ./samtools_sort_tmp" } ext.prefix = { "${meta.id}_sorted" } } - withName: '.*ALIGN:ALIGN_BWA_BWAMEM2:SAMTOOLS_STATS' { + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:SAMTOOLS_STATS' { ext.args = '-s --remove-overlaps' } - withName: '.*ALIGN:ALIGN_BWA_BWAMEM2:SAMTOOLS_MERGE' { + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:SAMTOOLS_MERGE' { ext.prefix = { "${meta.id}_sorted_merged" } } - withName: '.*ALIGN:ALIGN_BWA_BWAMEM2:MARKDUPLICATES' { + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:MARKDUPLICATES' { ext.args = "--TMP_DIR ." ext.prefix = { "${meta.id}_sorted_md" } publishDir = [ @@ -54,7 +61,7 @@ process { ] } - withName: '.*ALIGN:ALIGN_BWA_BWAMEM2:SAMTOOLS_INDEX_MARKDUP' { + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:SAMTOOLS_INDEX_MARKDUP' { publishDir = [ enabled: !params.save_mapped_as_cram, path: { "${params.outdir}/alignment" }, diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config index efcea90c..9ec4d47b 100644 --- a/conf/modules/prepare_references.config +++ b/conf/modules/prepare_references.config @@ -29,10 +29,20 @@ process { ext.when = {!params.bwamem2 && params.aligner == "bwamem2"} } + withName: '.*PREPARE_REFERENCES:BWAMEME_INDEX_GENOME' { + ext.args = '-a meme' + ext.when = {!params.bwameme && params.aligner == "bwameme"} + } + withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT_SHIFT' { ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.aligner == "bwamem2"} } + withName: '.*PREPARE_REFERENCES:BWAMEME_INDEX_MT_SHIFT' { + ext.args = '-a meme' + ext.when = { !(params.analysis_type == "wes") && params.aligner == "bwameme"} + } + withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_GENOME' { ext.when = {!params.bwa && params.aligner == "sentieon"} } diff --git a/docs/images/raredisease_metromap_dark.pdf b/docs/images/raredisease_metromap_dark.pdf index bb8df737..030e64e0 100644 Binary files a/docs/images/raredisease_metromap_dark.pdf and b/docs/images/raredisease_metromap_dark.pdf differ diff --git a/docs/images/raredisease_metromap_dark.png b/docs/images/raredisease_metromap_dark.png index ece6bb7d..1d5faec2 100644 Binary files a/docs/images/raredisease_metromap_dark.png and b/docs/images/raredisease_metromap_dark.png differ diff --git a/docs/images/raredisease_metromap_dark.svg b/docs/images/raredisease_metromap_dark.svg index 60ff7c05..02b513c1 100644 --- a/docs/images/raredisease_metromap_dark.svg +++ b/docs/images/raredisease_metromap_dark.svg @@ -10,7 +10,7 @@ xml:space="preserve" inkscape:version="1.3.2 (091e20e, 2023-11-25)" sodipodi:docname="raredisease_metromap_dark.svg" - inkscape:export-filename="raredisease_metromap_dark.png" + inkscape:export-filename="raredisease_metromap_dark.pdf" inkscape:export-xdpi="96" inkscape:export-ydpi="96" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" @@ -28,14 +28,14 @@ inkscape:deskcolor="#d1d1d1" inkscape:document-units="mm" showgrid="true" - inkscape:zoom="0.23190539" - inkscape:cx="1634.2872" - inkscape:cy="1187.9845" - inkscape:window-width="1440" - inkscape:window-height="872" - inkscape:window-x="0" - inkscape:window-y="0" - inkscape:window-maximized="1" + inkscape:zoom="0.29297896" + inkscape:cx="1926.7595" + inkscape:cy="762.85341" + inkscape:window-width="1920" + inkscape:window-height="1052" + inkscape:window-x="1710" + inkscape:window-y="28" + inkscape:window-maximized="0" inkscape:current-layer="layer1" showguides="true" inkscape:export-bgcolor="#272829ff">sentieon-bwasentieon-dedupmarkduplicatesbwamem2bwamantaalignment to mitochondriaalignment to mitochondriabwamem2/sentieon/bwabwamem2/sentieon/bwamarkduplicatesmarkduplicatesmutect2mutect2alignment to shifted mitochondriaalignment to shifted mitochondriasentieon-dnascopesentieon-dnascopesentieon-dnamodelapplysentieon-dnamodelapplydeepvariantglnexusdeepvariantglnexusbcftools - rohbcftools - rohstrangerstrangerupdupdvcfannovcfannocaddcaddvepvepgenmodgenmodcaddcaddvepvephmtnotehmtnotegenmodgenmodmanta + id="text16-5-0-9-5-5-5-04" + transform="matrix(1.3718931,-0.01669353,0.01669353,1.3718931,438.73449,-78.364966)">manta + + x="-95.560486" + y="292.85658" + id="tspan13"> + tiddit + x="-95.560486" + y="299.03019" + id="tspan14">tiddit + + x="-95.560486" + y="305.20381" + id="tspan15"> + cnvnatorcnvnatorvepvepsvdb-querysvdb-querygermlinecnvcallergermlinecnvcallergenmodgenmodexpansionhunterexpansionhuntermultiqcmultiqcpicardtools+mosdepthpicardtools+mosdepthvcfannovcfannobambam vcf vcf vcf vcffastqfastqreferenceseklipsedefault path alternative pathskippable pathsretroseq vcfvepsvdb-querymarkduplicatessentieon-bwasentieon-dedupbwa/bwamem2/bwamemefastqbamfastqbam vcf vcf vcf vcfeklipsedefault path alternative pathskippable pathsmantaretroseq vcfreferencesvepsvdb-query + inkscape:connector-curvature="0" + d="m 56.147034,260.93382 h 12.22215 v 5.39866 c 0,0.29149 0.236487,0.5276 0.52759,0.5276 h 4.660723 v 2.64321 H 56.147034 Z m 17.410508,24.51077 H 56.147034 v -5.08297 h 17.410463 v 5.08297 z" + id="path6276-4-8-2-4" /> diff --git a/docs/images/raredisease_metromap_light.pdf b/docs/images/raredisease_metromap_light.pdf index 2ae11fff..ac9e3918 100644 Binary files a/docs/images/raredisease_metromap_light.pdf and b/docs/images/raredisease_metromap_light.pdf differ diff --git a/docs/images/raredisease_metromap_light.png b/docs/images/raredisease_metromap_light.png index ce8f63b4..b1d0c3c6 100644 Binary files a/docs/images/raredisease_metromap_light.png and b/docs/images/raredisease_metromap_light.png differ diff --git a/docs/images/raredisease_metromap_light.svg b/docs/images/raredisease_metromap_light.svg index 7390dfb5..c761ed36 100644 --- a/docs/images/raredisease_metromap_light.svg +++ b/docs/images/raredisease_metromap_light.svg @@ -28,13 +28,13 @@ inkscape:deskcolor="#d1d1d1" inkscape:document-units="mm" showgrid="true" - inkscape:zoom="0.31207156" - inkscape:cx="1339.4364" - inkscape:cy="860.37958" - inkscape:window-width="1440" - inkscape:window-height="872" - inkscape:window-x="0" - inkscape:window-y="0" + inkscape:zoom="0.31621395" + inkscape:cx="1563.8146" + inkscape:cy="1098.9395" + inkscape:window-width="1920" + inkscape:window-height="1052" + inkscape:window-x="1710" + inkscape:window-y="28" inkscape:window-maximized="0" inkscape:current-layer="layer1" showguides="true" @@ -139,874 +139,13 @@ id="path26-7" />alignment to mitochondriabwamem2/sentieon/bwamarkduplicatesmutect2alignment to shifted mitochondriasentieon-dnascopesentieon-dnamodelapplydeepvariantglnexusbcftools - rohstrangerupdvcfannocaddvepgenmodcaddvephmtnotegenmodmanta - + -tiddit - + -cnvnatorvepsvdb-querygermlinecnvcallergenmodexpansionhuntermultiqcpicardtools+mosdepthvcfannobambam vcf vcf vcf vcfeklipseretroseq vcfdefault path alternative pathskippable pathssentieon-bwasentieon-dedupmarkduplicatesbwamem2mantabwaalignment to mitochondriabwamem2/sentieon/bwamarkduplicatesmutect2alignment to shifted mitochondriasentieon-dnascopesentieon-dnamodelapplydeepvariantglnexusbcftools - rohstrangerupdvcfannocaddvepgenmodcaddvephmtnotegenmodmanta + + +tiddit + + +cnvnatorvepsvdb-querygermlinecnvcallergenmodexpansionhuntermultiqcpicardtools+mosdepthvcfanno vcf vcf vcf vcfeklipseretroseq vcfdefault path alternative pathskippable pathsmarkduplicatessentieon-bwasentieon-dedupbwa/bwamem2/bwamemefastqfastqfastqfastqreferencesreferencesmantavepsvdb-query + d="m 74.145119,274.25217 h 12.22215 v 5.39866 c 0,0.29149 0.236487,0.5276 0.52759,0.5276 h 4.660723 v 2.64321 H 74.145119 Z m 17.410508,24.51077 H 74.145119 v -5.08297 h 17.410463 v 5.08297 z" + id="path6276-4-8-2" />vepsvdb-querybambam diff --git a/main.nf b/main.nf index aa92e744..838d75d7 100644 --- a/main.nf +++ b/main.nf @@ -21,6 +21,7 @@ params.fasta = getGenomeAttribute('fasta') params.fai = getGenomeAttribute('fai') params.bwa = getGenomeAttribute('bwa') params.bwamem2 = getGenomeAttribute('bwamem2') +params.bwameme = getGenomeAttribute('bwameme') params.call_interval = getGenomeAttribute('call_interval') params.cadd_resources = getGenomeAttribute('cadd_resources') params.gcnvcaller_model = getGenomeAttribute('gcnvcaller_model') diff --git a/modules.json b/modules.json index 2e29bd6f..e23ee838 100644 --- a/modules.json +++ b/modules.json @@ -75,6 +75,16 @@ "git_sha": "74363e1acc38eaedeede8d429477397c1a6f9e18", "installed_by": ["modules"] }, + "bwameme/index": { + "branch": "master", + "git_sha": "79480293280ff4f10f30bdea1ddd903f223f8489", + "installed_by": ["modules"] + }, + "bwameme/mem": { + "branch": "master", + "git_sha": "79480293280ff4f10f30bdea1ddd903f223f8489", + "installed_by": ["modules"] + }, "cadd": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", diff --git a/modules/nf-core/bwameme/index/environment.yml b/modules/nf-core/bwameme/index/environment.yml new file mode 100644 index 00000000..5d76ea18 --- /dev/null +++ b/modules/nf-core/bwameme/index/environment.yml @@ -0,0 +1,7 @@ +name: "bwameme_index" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::bwa-meme=1.0.6" diff --git a/modules/nf-core/bwameme/index/main.nf b/modules/nf-core/bwameme/index/main.nf new file mode 100644 index 00000000..870f494c --- /dev/null +++ b/modules/nf-core/bwameme/index/main.nf @@ -0,0 +1,61 @@ +process BWAMEME_INDEX { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bwa-meme:1.0.6--hdcf5f25_2': + 'biocontainers/bwa-meme:1.0.6--hdcf5f25_2' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("bwameme"), emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${fasta}" + def VERSION = '1.0.6' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions. + """ + mkdir bwameme + + bwa-meme index \\ + $args \\ + -t $task.cpus \\ + -p bwameme/$prefix \\ + $fasta + + build_rmis_dna.sh bwameme/$prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwameme: $VERSION + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${fasta}" + def VERSION = '1.0.6' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions. + """ + mkdir bwameme + touch bwameme/${prefix}.0123 + touch bwameme/${prefix}.ann + touch bwameme/${prefix}.pac + touch bwameme/${prefix}.amb + touch bwameme/${prefix}.pos_packed + touch bwameme/${prefix}.suffixarray_uint64 + touch bwameme/${prefix}.suffixarray_uint64_L0_PARAMETERS + touch bwameme/${prefix}.suffixarray_uint64_L1_PARAMETERS + touch bwameme/${prefix}.suffixarray_uint64_L2_PARAMETERS + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwameme: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwameme/index/meta.yml b/modules/nf-core/bwameme/index/meta.yml new file mode 100644 index 00000000..2db1d3a8 --- /dev/null +++ b/modules/nf-core/bwameme/index/meta.yml @@ -0,0 +1,44 @@ +name: "bwameme_index" +description: Create BWA-MEME index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - "bwameme": + description: "Faster BWA-MEM2 using learned-index" + homepage: https://github.com/kaist-ina/BWA-MEME + documentation: https://github.com/kaist-ina/BWA-MEME#getting-started + doi: "10.1093/bioinformatics/btac137" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - fasta: + type: file + description: Input genome fasta file + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - index: + type: file + description: BWA-MEME genome index files + pattern: "*.{0123,amb,ann,pac,pos_packed,suffixarray_uint64,suffixarray_uint64_L0_PARAMETERS,suffixarray_uint64_L1_PARAMETERS,suffixarray_uint64_L2_PARAMETERS}" + +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/bwameme/index/tests/main.nf.test b/modules/nf-core/bwameme/index/tests/main.nf.test new file mode 100644 index 00000000..b61f7660 --- /dev/null +++ b/modules/nf-core/bwameme/index/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process BWAMEME_INDEX" + script "../main.nf" + process "BWAMEME_INDEX" + + tag "modules" + tag "modules_nfcore" + tag "bwameme" + tag "bwameme/index" + + config "./nextflow.config" + + test("BWAMEME index") { + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("BWAMEME index - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bwameme/index/tests/main.nf.test.snap b/modules/nf-core/bwameme/index/tests/main.nf.test.snap new file mode 100644 index 00000000..e608af8b --- /dev/null +++ b/modules/nf-core/bwameme/index/tests/main.nf.test.snap @@ -0,0 +1,108 @@ +{ + "BWAMEME index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.fasta.0123:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.amb:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.ann:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.pac:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.pos_packed:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.suffixarray_uint64:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.suffixarray_uint64_L0_PARAMETERS:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.suffixarray_uint64_L1_PARAMETERS:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.suffixarray_uint64_L2_PARAMETERS:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,c80b8097b8a9c022e4c1e2617771ea3d" + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.fasta.0123:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.amb:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.ann:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.pac:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.pos_packed:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.suffixarray_uint64:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.suffixarray_uint64_L0_PARAMETERS:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.suffixarray_uint64_L1_PARAMETERS:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.suffixarray_uint64_L2_PARAMETERS:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,c80b8097b8a9c022e4c1e2617771ea3d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-15T13:44:38.551252246" + }, + "BWAMEME index": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.fasta.0123:md5,b02870de80106104abcb03cd9463e7d8", + "genome.fasta.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.fasta.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.fasta.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", + "genome.fasta.pos_packed:md5,3534bc04a547d02d6e4cb50908a40db9", + "genome.fasta.suffixarray_uint64:md5,51abb48f687661f88bf5bf5d370521e3", + "genome.fasta.suffixarray_uint64_L0_PARAMETERS:md5,ad6397d3d33bbb6a31b1320349e23274", + "genome.fasta.suffixarray_uint64_L1_PARAMETERS:md5,6c0d6dc7e733a7f373aa7b2730621aa4", + "genome.fasta.suffixarray_uint64_L2_PARAMETERS:md5,b2d4bad4e9f0e8960a0af12b7038ab1e" + ] + ] + ], + "1": [ + "versions.yml:md5,c80b8097b8a9c022e4c1e2617771ea3d" + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.fasta.0123:md5,b02870de80106104abcb03cd9463e7d8", + "genome.fasta.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.fasta.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.fasta.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", + "genome.fasta.pos_packed:md5,3534bc04a547d02d6e4cb50908a40db9", + "genome.fasta.suffixarray_uint64:md5,51abb48f687661f88bf5bf5d370521e3", + "genome.fasta.suffixarray_uint64_L0_PARAMETERS:md5,ad6397d3d33bbb6a31b1320349e23274", + "genome.fasta.suffixarray_uint64_L1_PARAMETERS:md5,6c0d6dc7e733a7f373aa7b2730621aa4", + "genome.fasta.suffixarray_uint64_L2_PARAMETERS:md5,b2d4bad4e9f0e8960a0af12b7038ab1e" + ] + ] + ], + "versions": [ + "versions.yml:md5,c80b8097b8a9c022e4c1e2617771ea3d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-15T13:44:25.040725565" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwameme/index/tests/nextflow.config b/modules/nf-core/bwameme/index/tests/nextflow.config new file mode 100644 index 00000000..5934b7d1 --- /dev/null +++ b/modules/nf-core/bwameme/index/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: BWAMEME_INDEX { + ext.args = '-a meme' + } + +} diff --git a/modules/nf-core/bwameme/index/tests/tags.yml b/modules/nf-core/bwameme/index/tests/tags.yml new file mode 100644 index 00000000..ddbcab04 --- /dev/null +++ b/modules/nf-core/bwameme/index/tests/tags.yml @@ -0,0 +1,2 @@ +bwameme/index: + - "modules/nf-core/bwameme/index/**" diff --git a/modules/nf-core/bwameme/mem/environment.yml b/modules/nf-core/bwameme/mem/environment.yml new file mode 100644 index 00000000..388eedd2 --- /dev/null +++ b/modules/nf-core/bwameme/mem/environment.yml @@ -0,0 +1,11 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "bwameme_mem" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::bwa-meme=1.0.6" + - "bioconda::mbuffer=20160228" + - "bioconda::samtools=1.20" diff --git a/modules/nf-core/bwameme/mem/main.nf b/modules/nf-core/bwameme/mem/main.nf new file mode 100644 index 00000000..db41316e --- /dev/null +++ b/modules/nf-core/bwameme/mem/main.nf @@ -0,0 +1,94 @@ +process BWAMEME_MEM { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ed29b84fa94419f5a7bf6a841ddbcb964768825b:139b5e403886ad278b9ad139174967441c1c6ff3-0': + 'biocontainers/mulled-v2-ed29b84fa94419f5a7bf6a841ddbcb964768825b:139b5e403886ad278b9ad139174967441c1c6ff3-0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(fasta) + val sort_bam + + output: + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram, optional:true + tuple val(meta), path("*.crai") , emit: crai, optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + def mbuffer_mem = 3072 + if (!task.memory) { + log.info '[bwameme-mbuffer] Available memory not known - defaulting to 3GB for mbuffer. Specify process memory requirements to change this.' + } else { + mbuffer_mem = (task.memory.mega*0.5).intValue() + } + def mbuffer_command = sort_bam ? "| mbuffer -m ${mbuffer_mem}M" : "" + def mem_per_thread = sort_bam ? "-m "+ (mbuffer_mem/task.cpus).intValue()+"M" : "" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + def VERSION = '1.0.6' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions. + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + bwa-meme \\ + mem \\ + $args \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + $mbuffer_command \\ + | samtools $samtools_command $args2 $mem_per_thread -@ $task.cpus ${reference} -o ${prefix}.${extension} - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwameme: $VERSION + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } + def VERSION = '1.0.6' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions. + """ + touch ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwameme: $VERSION + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwameme/mem/meta.yml b/modules/nf-core/bwameme/mem/meta.yml new file mode 100644 index 00000000..c7eb7b28 --- /dev/null +++ b/modules/nf-core/bwameme/mem/meta.yml @@ -0,0 +1,90 @@ +name: "bwameme_mem" +description: Performs fastq alignment to a fasta reference using BWA-MEME +keywords: + - mem + - bwa + - bwamem2 + - bwameme + - alignment + - map + - fastq + - bam + - sam + - cram +tools: + - "bwameme": + description: "Faster BWA-MEM2 using learned-index" + homepage: https://github.com/kaist-ina/BWA-MEME + documentation: https://github.com/kaist-ina/BWA-MEME#getting-started + doi: "10.1093/bioinformatics/btac137" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - meta2: + type: map + description: | + Groovy Map containing reference/index information + e.g. [ id:'test' ] + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{0132,amb,ann,bwt.2bit.64,pac}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome in FASTA format + pattern: "*.{fa,fasta,fna}" + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - sam: + type: file + description: Output SAM file containing read alignments + pattern: "*.{sam}" + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - cram: + type: file + description: Output CRAM file containing read alignments + pattern: "*.{cram}" + - crai: + type: file + description: Index file for CRAM file + pattern: "*.{crai}" + - csi: + type: file + description: Index file for BAM file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/bwameme/mem/tests/main.nf.test b/modules/nf-core/bwameme/mem/tests/main.nf.test new file mode 100644 index 00000000..3b67b39e --- /dev/null +++ b/modules/nf-core/bwameme/mem/tests/main.nf.test @@ -0,0 +1,239 @@ +nextflow_process { + + name "Test Process BWAMEME_MEM" + script "../main.nf" + process "BWAMEME_MEM" + + tag "modules" + tag "modules_nfcore" + tag "bwameme" + tag "bwameme/mem" + tag "bwameme/index" + config "./nextflow.config" + + test("sarscov2 - fastq, index, fasta, false") { + + setup { + run("BWAMEME_INDEX") { + script "../../index/main.nf" + config "./nextflow.config" + process { + """ + input[0] = Channel.of([ + [:], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + input[1] = BWAMEME_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true") { + + setup { + run("BWAMEME_INDEX") { + script "../../index/main.nf" + config "./nextflow.config" + process { + """ + input[0] = Channel.of([ + [:], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + input[1] = BWAMEME_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false") { + + setup { + run("BWAMEME_INDEX") { + script "../../index/main.nf" + config "./nextflow.config" + process { + """ + input[0] = Channel.of([ + [:], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEME_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true") { + + setup { + run("BWAMEME_INDEX") { + script "../../index/main.nf" + config "./nextflow.config" + process { + """ + input[0] = Channel.of([ + [:], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEME_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true - stub") { + + options "-stub" + + setup { + run("BWAMEME_INDEX") { + script "../../index/main.nf" + config "./nextflow.config" + process { + """ + input[0] = Channel.of([ + [:], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEME_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } +} \ No newline at end of file diff --git a/modules/nf-core/bwameme/mem/tests/main.nf.test.snap b/modules/nf-core/bwameme/mem/tests/main.nf.test.snap new file mode 100644 index 00000000..281011ae --- /dev/null +++ b/modules/nf-core/bwameme/mem/tests/main.nf.test.snap @@ -0,0 +1,67 @@ +{ + "sarscov2 - [fastq1, fastq2], index, fasta, false": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-15T20:04:31.962017214" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true - stub": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-15T19:28:46.895668666" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-15T20:44:56.510177191" + }, + "sarscov2 - fastq, index, fasta, false": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-15T20:00:05.782384898" + }, + "sarscov2 - fastq, index, fasta, true": { + "content": [ + "test.bam", + [ + "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-15T20:44:05.2657749" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwameme/mem/tests/nextflow.config b/modules/nf-core/bwameme/mem/tests/nextflow.config new file mode 100644 index 00000000..e7dd707e --- /dev/null +++ b/modules/nf-core/bwameme/mem/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + withName: BWAMEME_INDEX { + ext.args = '-a meme' + } + withName: BWAMEME_MEM { + ext.args = '-7' + } +} diff --git a/modules/nf-core/bwameme/mem/tests/tags.yml b/modules/nf-core/bwameme/mem/tests/tags.yml new file mode 100644 index 00000000..9e3a26cf --- /dev/null +++ b/modules/nf-core/bwameme/mem/tests/tags.yml @@ -0,0 +1,2 @@ +bwameme/mem: + - "modules/nf-core/bwameme/mem/**" diff --git a/nextflow.config b/nextflow.config index 63b2b529..e5f507ba 100644 --- a/nextflow.config +++ b/nextflow.config @@ -326,7 +326,7 @@ includeConfig 'conf/modules/qc_bam.config' includeConfig 'conf/modules/rank_variants.config' includeConfig 'conf/modules/scatter_genome.config' includeConfig 'conf/modules/align_MT.config' -includeConfig 'conf/modules/align_bwa_bwamem2.config' +includeConfig 'conf/modules/align_bwa_bwamem2_bwameme.config' includeConfig 'conf/modules/align_sentieon.config' includeConfig 'conf/modules/annotate_cadd.config' includeConfig 'conf/modules/call_snv_MT.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index c2e03fd6..d2f38575 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -74,6 +74,14 @@ "help_text": "If none provided, will be generated automatically from the FASTA reference.", "fa_icon": "fas fa-folder-open" }, + "bwameme": { + "type": "string", + "exists": true, + "format": "directory-path", + "description": "Directory for pre-built bwameme's learned index.", + "help_text": "If none provided, will be generated automatically from the FASTA reference.", + "fa_icon": "fas fa-folder-open" + }, "cadd_resources": { "type": "string", "exists": true, @@ -590,7 +598,7 @@ "default": "bwamem2", "description": "Specifies the alignment algorithm to use - available options are 'bwamem2' and 'sentieon'.", "fa_icon": "fas fa-align-center", - "enum": ["bwa", "bwamem2", "sentieon"] + "enum": ["bwa", "bwamem2", "bwameme", "sentieon"] }, "min_trimmed_length": { "type": "integer", diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index da1c7ab6..8822d1cb 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -3,7 +3,7 @@ // include { FASTP } from '../../modules/nf-core/fastp/main' -include { ALIGN_BWA_BWAMEM2 } from './alignment/align_bwa_bwamem2' +include { ALIGN_BWA_BWAMEM2_BWAMEME } from './alignment/align_bwa_bwamem2_bwameme' include { ALIGN_SENTIEON } from './alignment/align_sentieon' include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' include { ALIGN_MT } from './alignment/align_MT' @@ -17,9 +17,11 @@ workflow ALIGN { ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_genome_bwaindex // channel: [mandatory] [ val(meta), path(index) ] ch_genome_bwamem2index // channel: [mandatory] [ val(meta), path(index) ] + ch_genome_bwamemeindex // channel: [mandatory] [ val(meta), path(index) ] ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] ch_mtshift_bwaindex // channel: [mandatory] [ val(meta), path(index) ] ch_mtshift_bwamem2index // channel: [mandatory] [ val(meta), path(index) ] + ch_mtshift_bwamemeindex // channel: [mandatory] [ val(meta), path(index) ] ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_mtshift_dictionary // channel: [mandatory] [ val(meta), path(dict) ] ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ] @@ -46,18 +48,19 @@ workflow ALIGN { ch_fastp_json = FASTP.out.json } - if (params.aligner.equals("bwamem2") || params.aligner.equals("bwa")) { - ALIGN_BWA_BWAMEM2 ( // Triggered when params.aligner is set as bwamem2 or bwa + if (params.aligner.matches("bwamem2|bwa|bwameme")) { + ALIGN_BWA_BWAMEM2_BWAMEME ( // Triggered when params.aligner is set as bwamem2 or bwa or bwameme ch_reads, ch_genome_bwaindex, ch_genome_bwamem2index, + ch_genome_bwamemeindex, ch_genome_fasta, ch_genome_fai, val_platform ) - ch_bwamem2_bam = ALIGN_BWA_BWAMEM2.out.marked_bam - ch_bwamem2_bai = ALIGN_BWA_BWAMEM2.out.marked_bai - ch_versions = ch_versions.mix(ALIGN_BWA_BWAMEM2.out.versions) + ch_bwamem2_bam = ALIGN_BWA_BWAMEM2_BWAMEME.out.marked_bam + ch_bwamem2_bai = ALIGN_BWA_BWAMEM2_BWAMEME.out.marked_bai + ch_versions = ch_versions.mix(ALIGN_BWA_BWAMEM2_BWAMEME.out.versions) } else if (params.aligner.equals("sentieon")) { ALIGN_SENTIEON ( // Triggered when params.aligner is set as sentieon ch_reads, @@ -68,7 +71,7 @@ workflow ALIGN { ) ch_sentieon_bam = ALIGN_SENTIEON.out.marked_bam ch_sentieon_bai = ALIGN_SENTIEON.out.marked_bai - ch_versions = ch_versions.mix(ALIGN_SENTIEON.out.versions) + ch_versions = ch_versions.mix(ALIGN_SENTIEON.out.versions) } ch_genome_marked_bam = Channel.empty().mix(ch_bwamem2_bam, ch_sentieon_bam) @@ -76,6 +79,7 @@ workflow ALIGN { ch_genome_bam_bai = ch_genome_marked_bam.join(ch_genome_marked_bai, failOnMismatch:true, failOnDuplicate:true) // PREPARING READS FOR MT ALIGNMENT + if (params.analysis_type.equals("wgs") || params.run_mt_for_wes) { CONVERT_MT_BAM_TO_FASTQ ( ch_genome_bam_bai, @@ -89,6 +93,7 @@ workflow ALIGN { CONVERT_MT_BAM_TO_FASTQ.out.bam, ch_genome_bwaindex, ch_genome_bwamem2index, + ch_genome_bwamemeindex, ch_genome_fasta, ch_genome_dictionary, ch_genome_fai @@ -99,6 +104,7 @@ workflow ALIGN { CONVERT_MT_BAM_TO_FASTQ.out.bam, ch_mtshift_bwaindex, ch_mtshift_bwamem2index, + ch_mtshift_bwamemeindex, ch_mtshift_fasta, ch_mtshift_dictionary, ch_mtshift_fai diff --git a/subworkflows/local/alignment/align_MT.nf b/subworkflows/local/alignment/align_MT.nf index 1bb5c56f..9121bc2d 100644 --- a/subworkflows/local/alignment/align_MT.nf +++ b/subworkflows/local/alignment/align_MT.nf @@ -5,6 +5,7 @@ include { BWA_MEM as BWA_MEM_MT } from '../../../modules/nf-core/bwa/mem/main' include { SENTIEON_BWAMEM as SENTIEON_BWAMEM_MT } from '../../../modules/nf-core/sentieon/bwamem/main' include { BWAMEM2_MEM as BWAMEM2_MEM_MT } from '../../../modules/nf-core/bwamem2/mem/main' +include { BWAMEME_MEM as BWAMEME_MEM_MT } from '../../../modules/nf-core/bwameme/mem/main' include { GATK4_MERGEBAMALIGNMENT as GATK4_MERGEBAMALIGNMENT_MT } from '../../../modules/nf-core/gatk4/mergebamalignment/main' include { PICARD_ADDORREPLACEREADGROUPS as PICARD_ADDORREPLACEREADGROUPS_MT } from '../../../modules/nf-core/picard/addorreplacereadgroups/main' include { PICARD_MARKDUPLICATES as PICARD_MARKDUPLICATES_MT } from '../../../modules/nf-core/picard/markduplicates/main' @@ -17,6 +18,7 @@ workflow ALIGN_MT { ch_ubam // channel: [mandatory] [ val(meta), path(bam) ] ch_bwaindex // channel: [mandatory for sentieon] [ val(meta), path(index) ] ch_bwamem2index // channel: [mandatory for bwamem2] [ val(meta), path(index) ] + ch_bwamemeindex // channel: [mandatory for bwameme] [ val(meta), path(index) ] ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_dict // channel: [mandatory] [ val(meta), path(dict) ] ch_fai // channel: [mandatory] [ val(meta), path(fai) ] @@ -36,6 +38,10 @@ workflow ALIGN_MT { BWA_MEM_MT ( ch_fastq, ch_bwaindex, true ) ch_align = BWA_MEM_MT.out.bam ch_versions = ch_versions.mix(BWA_MEM_MT.out.versions.first()) + } else if (params.aligner.equals("bwameme")) { + BWAMEME_MEM_MT (ch_fastq, ch_bwamemeindex, ch_fasta, true) + ch_align = BWAMEME_MEM_MT.out.bam + ch_versions = ch_versions.mix(BWAMEME_MEM_MT.out.versions.first()) } ch_align .join(ch_ubam, failOnMismatch:true, failOnDuplicate:true) diff --git a/subworkflows/local/alignment/align_bwa_bwamem2.nf b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf similarity index 90% rename from subworkflows/local/alignment/align_bwa_bwamem2.nf rename to subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf index b846cad3..87e9a958 100644 --- a/subworkflows/local/alignment/align_bwa_bwamem2.nf +++ b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf @@ -5,6 +5,7 @@ include { BWA_MEM } from '../../../modules/nf-core/bwa/mem/main' include { BWA_MEM as BWAMEM_FALLBACK } from '../../../modules/nf-core/bwa/mem/main' include { BWAMEM2_MEM } from '../../../modules/nf-core/bwamem2/mem/main' +include { BWAMEME_MEM } from '../../../modules/nf-core/bwameme/mem/main' include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_ALIGN } from '../../../modules/nf-core/samtools/index/main' include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MARKDUP } from '../../../modules/nf-core/samtools/index/main' include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' @@ -12,11 +13,12 @@ include { SAMTOOLS_MERGE } from '../../../modules/nf-c include { PICARD_MARKDUPLICATES as MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main' -workflow ALIGN_BWA_BWAMEM2 { +workflow ALIGN_BWA_BWAMEM2_BWAMEME { take: ch_reads_input // channel: [mandatory] [ val(meta), path(reads_input) ] ch_bwa_index // channel: [mandatory] [ val(meta), path(bwamem2_index) ] ch_bwamem2_index // channel: [mandatory] [ val(meta), path(bwamem2_index) ] + ch_bwameme_index // channel: [mandatory] [ val(meta), path(bwamem2_index) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] val_platform // string: [mandatory] default: illumina @@ -29,6 +31,10 @@ workflow ALIGN_BWA_BWAMEM2 { BWA_MEM ( ch_reads_input, ch_bwa_index, true ) ch_align = BWA_MEM.out.bam ch_versions = ch_versions.mix(BWA_MEM.out.versions.first()) + } else if (params.aligner.equals("bwameme")) { + BWAMEME_MEM ( ch_reads_input, ch_bwameme_index, ch_genome_fasta, true ) + ch_align = BWAMEME_MEM.out.bam + ch_versions = ch_versions.mix(BWAMEME_MEM.out.versions.first()) } else { BWAMEM2_MEM ( ch_reads_input, ch_bwamem2_index, true ) ch_align = BWAMEM2_MEM.out.bam @@ -49,7 +55,6 @@ workflow ALIGN_BWA_BWAMEM2 { ch_align = ch_fallback.SUCCESS.mix(BWAMEM_FALLBACK.out.bam) ch_versions = ch_versions.mix(BWAMEM_FALLBACK.out.versions.first()) } - } SAMTOOLS_INDEX_ALIGN ( ch_align ) diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 64d33248..2bd4b6dd 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -6,6 +6,8 @@ include { BWA_INDEX as BWA_INDEX_GENOME } from '../../modul include { BWA_INDEX as BWA_INDEX_MT_SHIFT } from '../../modules/nf-core/bwa/index/main' include { BWAMEM2_INDEX as BWAMEM2_INDEX_GENOME } from '../../modules/nf-core/bwamem2/index/main' include { BWAMEM2_INDEX as BWAMEM2_INDEX_MT_SHIFT } from '../../modules/nf-core/bwamem2/index/main' +include { BWAMEME_INDEX as BWAMEME_INDEX_GENOME } from '../../modules/nf-core/bwameme/index/main' +include { BWAMEME_INDEX as BWAMEME_INDEX_MT_SHIFT } from '../../modules/nf-core/bwameme/index/main' include { CAT_CAT as CAT_CAT_BAIT } from '../../modules/nf-core/cat/cat/main' include { GATK4_BEDTOINTERVALLIST as GATK_BILT } from '../../modules/nf-core/gatk4/bedtointervallist/main' include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD } from '../../modules/nf-core/gatk4/createsequencedictionary/main' @@ -45,24 +47,27 @@ workflow PREPARE_REFERENCES { ch_sentieonbwa = Channel.empty() // Genome indices - BWA_INDEX_GENOME(ch_genome_fasta).index.set{ch_bwa} - BWAMEM2_INDEX_GENOME(ch_genome_fasta) - SENTIEON_BWAINDEX_GENOME(ch_genome_fasta).index.set{ch_sentieonbwa} SAMTOOLS_FAIDX_GENOME(ch_genome_fasta, [[],[]]) GATK_SD(ch_genome_fasta) ch_fai = Channel.empty().mix(ch_genome_fai, SAMTOOLS_FAIDX_GENOME.out.fai).collect() GET_CHROM_SIZES( ch_fai ) - ch_genome_fasta.map { meta, fasta -> return [meta, fasta, [], [] ] } - .set {ch_rtgformat_in} - RTGTOOLS_FORMAT(ch_rtgformat_in) - // MT indices + // Genome alignment indices + BWA_INDEX_GENOME(ch_genome_fasta).index.set{ch_bwa} + BWAMEM2_INDEX_GENOME(ch_genome_fasta) + BWAMEME_INDEX_GENOME(ch_genome_fasta) + SENTIEON_BWAINDEX_GENOME(ch_genome_fasta).index.set{ch_sentieonbwa} + + // MT genome indices SAMTOOLS_EXTRACT_MT(ch_genome_fasta, ch_fai) ch_mt_fasta_in = Channel.empty().mix(ch_mt_fasta, SAMTOOLS_EXTRACT_MT.out.fa).collect() SAMTOOLS_FAIDX_MT_SHIFT(ch_mt_fasta_in, [[],[]]) GATK_SD_MT_SHIFT(ch_mt_fasta_in) GATK_SHIFTFASTA(ch_mt_fasta_in, SAMTOOLS_FAIDX_MT_SHIFT.out.fai, GATK_SD_MT_SHIFT.out.dict) + + // MT alignment indices BWAMEM2_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa) + BWAMEME_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa) BWA_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa) SENTIEON_BWAINDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa) ch_bwa_mtshift = Channel.empty().mix(SENTIEON_BWAINDEX_MT_SHIFT.out.index, BWA_INDEX_MT_SHIFT.out.index).collect() @@ -79,7 +84,6 @@ workflow PREPARE_REFERENCES { // Vcf, tab and bed indices TABIX_DBSNP(ch_known_dbsnp) - ch_versions = ch_versions.mix(TABIX_DBSNP.out.versions) TABIX_GNOMAD_AF(ch_gnomad_af_tab) TABIX_PT(ch_target_bed).tbi.set { ch_tbi } TABIX_PBT(ch_target_bed).gz_tbi.set { ch_bgzip_tbi } @@ -101,9 +105,15 @@ workflow PREPARE_REFERENCES { GATK_PREPROCESS_WGS (ch_genome_fasta, ch_fai, GATK_SD.out.dict, [[],[]], [[],[]]).set {ch_preprocwgs} GATK_PREPROCESS_WES (ch_genome_fasta, ch_fai, GATK_SD.out.dict, GATK_BILT.out.interval_list, [[],[]]).set {ch_preprocwes} + // RTG tools + ch_genome_fasta.map { meta, fasta -> return [meta, fasta, [], [] ] } + .set {ch_rtgformat_in} + RTGTOOLS_FORMAT(ch_rtgformat_in) + // Gather versions ch_versions = ch_versions.mix(BWA_INDEX_GENOME.out.versions) ch_versions = ch_versions.mix(BWAMEM2_INDEX_GENOME.out.versions) + ch_versions = ch_versions.mix(BWAMEME_INDEX_GENOME.out.versions) ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_GENOME.out.versions) ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_GENOME.out.versions) ch_versions = ch_versions.mix(GATK_SD.out.versions) @@ -112,12 +122,14 @@ workflow PREPARE_REFERENCES { ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_MT_SHIFT.out.versions) ch_versions = ch_versions.mix(GATK_SD_MT_SHIFT.out.versions) ch_versions = ch_versions.mix(GATK_SHIFTFASTA.out.versions) + ch_versions = ch_versions.mix(BWAMEME_INDEX_MT_SHIFT.out.versions) ch_versions = ch_versions.mix(BWAMEM2_INDEX_MT_SHIFT.out.versions) ch_versions = ch_versions.mix(BWA_INDEX_MT_SHIFT.out.versions) ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_MT_SHIFT.out.versions) ch_versions = ch_versions.mix(TABIX_GNOMAD_AF.out.versions) ch_versions = ch_versions.mix(TABIX_PT.out.versions) ch_versions = ch_versions.mix(TABIX_PBT.out.versions) + ch_versions = ch_versions.mix(TABIX_DBSNP.out.versions) ch_versions = ch_versions.mix(GATK_BILT.out.versions) ch_versions = ch_versions.mix(GATK_ILT.out.versions) ch_versions = ch_versions.mix(CAT_CAT_BAIT.out.versions) @@ -129,6 +141,7 @@ workflow PREPARE_REFERENCES { emit: genome_bwa_index = Channel.empty().mix(ch_bwa, ch_sentieonbwa).collect() // channel: [ val(meta), path(index) ] genome_bwamem2_index = BWAMEM2_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ] + genome_bwameme_index = BWAMEME_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ] genome_chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ] genome_fai = ch_fai // channel: [ val(meta), path(fai) ] genome_dict = GATK_SD.out.dict.collect() // channel: [ path(dict) ] @@ -143,6 +156,7 @@ workflow PREPARE_REFERENCES { mtshift_dict = GATK_SHIFTFASTA.out.dict.collect() // channel: [ path(dict) ] mtshift_bwa_index = ch_bwa_mtshift // channel: [ val(meta), path(index) ] mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] + mtshift_bwameme_index = BWAMEME_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ] known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ] diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 17ac7f5c..26f62e24 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -212,6 +212,8 @@ workflow RAREDISEASE { : ch_references.genome_bwa_index ch_genome_bwamem2index = params.bwamem2 ? Channel.fromPath(params.bwamem2).map {it -> [[id:it[0].simpleName], it]}.collect() : ch_references.genome_bwamem2_index + ch_genome_bwamemeindex = params.bwameme ? Channel.fromPath(params.bwameme).map {it -> [[id:it[0].simpleName], it]}.collect() + : ch_references.genome_bwameme_index ch_genome_chrsizes = ch_references.genome_chrom_sizes ch_genome_fai = ch_references.genome_fai ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() @@ -242,6 +244,7 @@ workflow RAREDISEASE { ch_mtshift_backchain = ch_references.mtshift_backchain ch_mtshift_bwaindex = ch_references.mtshift_bwa_index ch_mtshift_bwamem2index = ch_references.mtshift_bwamem2_index + ch_mtshift_bwamemeindex = ch_references.mtshift_bwameme_index ch_mtshift_dictionary = ch_references.mtshift_dict ch_mtshift_fai = ch_references.mtshift_fai ch_mtshift_fasta = ch_references.mtshift_fasta @@ -355,9 +358,11 @@ workflow RAREDISEASE { ch_genome_fai, ch_genome_bwaindex, ch_genome_bwamem2index, + ch_genome_bwamemeindex, ch_genome_dictionary, ch_mtshift_bwaindex, ch_mtshift_bwamem2index, + ch_mtshift_bwamemeindex, ch_mtshift_fasta, ch_mtshift_dictionary, ch_mtshift_fai,