From 6054b770dc34b93d340b437a38b67a8b60b4c129 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 18 Sep 2024 15:18:09 +0000 Subject: [PATCH 1/9] first design mirdeep --- .../nf-core/mirdeep2/mapper/environment.yml | 7 + modules/nf-core/mirdeep2/mapper/main.nf | 53 +++++++ modules/nf-core/mirdeep2/mapper/meta.yml | 49 ++++++ .../mirdeep2/mapper/tests/main.nf.test | 141 ++++++++++++++++++ .../mirdeep2/mapper/tests/main.nf.test.snap | 51 +++++++ .../mirdeep2/mapper/tests/nextflow.config | 11 ++ .../nf-core/mirdeep2/mirdeep2/environment.yml | 7 + modules/nf-core/mirdeep2/mirdeep2/main.nf | 59 ++++++++ modules/nf-core/mirdeep2/mirdeep2/meta.yml | 65 ++++++++ .../mirdeep2/mirdeep2/tests/main.nf.test | 105 +++++++++++++ .../mirdeep2/mirdeep2/tests/main.nf.test.snap | 57 +++++++ .../mirdeep2/mirdeep2/tests/nextflow.config | 5 + .../nf-core/fastq_find_mirna_mirdeep2/main.nf | 40 +++++ .../fastq_find_mirna_mirdeep2/meta.yml | 51 +++++++ .../tests/main.nf.test | 51 +++++++ .../tests/main.nf.test.snap | 50 +++++++ .../tests/nextflow.config | 16 ++ 17 files changed, 818 insertions(+) create mode 100644 modules/nf-core/mirdeep2/mapper/environment.yml create mode 100644 modules/nf-core/mirdeep2/mapper/main.nf create mode 100644 modules/nf-core/mirdeep2/mapper/meta.yml create mode 100644 modules/nf-core/mirdeep2/mapper/tests/main.nf.test create mode 100644 modules/nf-core/mirdeep2/mapper/tests/main.nf.test.snap create mode 100644 modules/nf-core/mirdeep2/mapper/tests/nextflow.config create mode 100644 modules/nf-core/mirdeep2/mirdeep2/environment.yml create mode 100644 modules/nf-core/mirdeep2/mirdeep2/main.nf create mode 100644 modules/nf-core/mirdeep2/mirdeep2/meta.yml create mode 100644 modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test create mode 100644 modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap create mode 100644 modules/nf-core/mirdeep2/mirdeep2/tests/nextflow.config create mode 100644 subworkflows/nf-core/fastq_find_mirna_mirdeep2/main.nf create mode 100644 subworkflows/nf-core/fastq_find_mirna_mirdeep2/meta.yml create mode 100644 subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test create mode 100644 subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/nextflow.config diff --git a/modules/nf-core/mirdeep2/mapper/environment.yml b/modules/nf-core/mirdeep2/mapper/environment.yml new file mode 100644 index 00000000000..fafc6663255 --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::mirdeep2=2.0.1.2" diff --git a/modules/nf-core/mirdeep2/mapper/main.nf b/modules/nf-core/mirdeep2/mapper/main.nf new file mode 100644 index 00000000000..3d53b4e8d28 --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/main.nf @@ -0,0 +1,53 @@ +process MIRDEEP2_MAPPER { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.2--0': + 'biocontainers/mirdeep2:2.0.1.2--0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index, stageAs: '*') + + output: + tuple val(meta), path('*.fa'), path('*.arf'), emit: mirdeep2_inputs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.0.1' + + """ + mapper.pl \\ + ${reads} \\ + $args \\ + -p ${index}/${meta2.id} \\ + -s ${prefix}_collapsed.fa \\ + -t ${prefix}_reads_collapsed_vs_${meta2.id}_genome.arf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mirdeep2: \$(echo "$VERSION") + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.0.1' + """ + touch ${prefix}.fa + touch ${prefix}reads_vs_refdb.arf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mirdeep2: \$(echo "$VERSION") + END_VERSIONS + """ +} diff --git a/modules/nf-core/mirdeep2/mapper/meta.yml b/modules/nf-core/mirdeep2/mapper/meta.yml new file mode 100644 index 00000000000..9f798e93970 --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/meta.yml @@ -0,0 +1,49 @@ +name: "mirdeep2_mapper" +description: | + miRDeep2 Mapper is a tool that prepares deep sequencing reads for downstream miRNA detection by collapsing reads, mapping them to a genome, and outputting the required files for miRNA discovery. +keywords: + - mirdeep2 + - mapper + - RNA sequencing +tools: + - "mirdeep2": + description: | + miRDeep2 Mapper (`mapper.pl`) is part of the miRDeep2 suite. It collapses identical reads, maps them to a reference genome, and outputs both collapsed FASTA and ARF files for downstream miRNA detection and analysis. + homepage: "https://www.mdc-berlin.de/content/mirdeep2-documentation" + documentation: "https://www.mdc-berlin.de/content/mirdeep2-documentation" + tool_dev_url: "https://github.com/rajewsky-lab/mirdeep2" + doi: "10.1093/nar/gkn491" + licence: ["GPL V3"] + +input: + - meta: + type: map + description: Groovy Map containing sample information, e.g. `[ id:'sample1', single_end:false ]` + - reads: + type: file + description: File containing the raw sequencing reads that need to be collapsed and mapped to a reference genome. + pattern: "*.fa" + - meta2: + type: map + description: Groovy Map containing information about the genome index. + - index: + type: file + description: Path to the genome index file used for mapping the reads to the genome. + pattern: "*" + +output: + - meta: + type: map + description: Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` + - mirdeep2_inputs: + type: file + description: Outputs for miRDeep2 including the collapsed reads file (FASTA) and the mapped reads in ARF format. + pattern: "*" + - versions: + type: file + description: File containing software versions for tracking. + pattern: "versions.yml" +authors: + - "@atrigila" +maintainers: + - "@atrigila" diff --git a/modules/nf-core/mirdeep2/mapper/tests/main.nf.test b/modules/nf-core/mirdeep2/mapper/tests/main.nf.test new file mode 100644 index 00000000000..0924f77124b --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/tests/main.nf.test @@ -0,0 +1,141 @@ + +nextflow_process { + + name "Test Process MIRDEEP2_MAPPER" + script "../main.nf" + process "MIRDEEP2_MAPPER" + + tag "modules" + tag "modules_nfcore" + tag "mirdeep2" + tag "bowtie/build" + tag "mirdeep2/mapper" + tag "seqkit/fq2fa" + tag "seqkit/replace" + + + setup { + run("BOWTIE_BUILD") { + script "../../../bowtie/build/main.nf" + process { + """ + input[0] = [ + [ id:'genome_cel_cluster' ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/cel_cluster.fa', checkIfExists: true) + ] + """ + } + } + + run("SEQKIT_FQ2FA") { + script "../../../seqkit/fq2fa/main.nf" + process { + """ + input[0] = [ + [ id:'small_Clone1_N1' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/smrnaseq/testdata/trimmed/small_Clone1_N1.fastp.fastq.gz', checkIfExists: true) + ] + """ + } + } + + run("SEQKIT_REPLACE") { + script "../../../seqkit/replace/main.nf" + config "./nextflow.config" + process { + """ + input[0] = SEQKIT_FQ2FA.out.fasta + """ + } + } + + } + + test("mirdeep2 - mapper - fasta celegans") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test_reads', single_end:false ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/reads.fa', checkIfExists: true) + ] + input[1] = BOWTIE_BUILD.out.index + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + + // md5sum not stable - IDs change while sequences are the same + + // Assert TCACCGGGGGTACATCAGCTAA occurs once + { assert file(process.out.mirdeep2_inputs[0][1]).readLines().findAll { it.contains("TCACCGGGGGTACATCAGCTAA") }.size() == 1 }, + + // Assert seq_347479_x287 occurs once + { assert file(process.out.mirdeep2_inputs[0][1]).readLines().findAll { it.contains("seq_347479_x287") }.size() == 1 }, + + // Assert that specific content occurs 4 times + { assert file(process.out.mirdeep2_inputs[0][2]).readLines().findAll { it.contains("21\t1\t21\ttcaccgggtgtaaatcagctt\tchrII:11534525-11540624\t21\t3535\t3555\ttcaccgggtgtaaatcagctt\t+\t0\tmmmmmmmmmmmmmmmmmmmmm") }.size() == 4 } + ) + } + + } + + test("mirdeep2 - mapper - fasta smrnaseq") { + config "./nextflow.config" + + when { + process { + """ + input[0] = SEQKIT_REPLACE.out.fastx + input[1] = BOWTIE_BUILD.out.index + """ + } + } + + then { + assertAll( + { assert process.success }, + + // Assert reads occurs once + { assert file(process.out.mirdeep2_inputs[0][1]).readLines().findAll { it.contains("TACCTGAGGTAGCAGGTTGTATAGTTGGGG") }.size() == 1 }, + + // Assert ID occurs once + { assert file(process.out.mirdeep2_inputs[0][1]).readLines().findAll { it.contains("seq_996152_x1") }.size() == 1 } + + ) + } + + } + + test("mirdeep2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test_reads', single_end:false ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/reads.fa', checkIfExists: true) + ] + input[1] = BOWTIE_BUILD.out.index + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/mirdeep2/mapper/tests/main.nf.test.snap b/modules/nf-core/mirdeep2/mapper/tests/main.nf.test.snap new file mode 100644 index 00000000000..52d6933a67c --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/tests/main.nf.test.snap @@ -0,0 +1,51 @@ +{ + "mirdeep2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_reads", + "single_end": false + }, + "test_reads.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_readsreads_vs_refdb.arf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,33c794292d6772d67fa8001439394614" + ], + "mirdeep2_inputs": [ + [ + { + "id": "test_reads", + "single_end": false + }, + "test_reads.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_readsreads_vs_refdb.arf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,33c794292d6772d67fa8001439394614" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-17T17:42:03.429229932" + }, + "mirdeep2 - mapper - fasta celegans": { + "content": [ + [ + "versions.yml:md5,33c794292d6772d67fa8001439394614" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-17T17:41:05.101661825" + } +} \ No newline at end of file diff --git a/modules/nf-core/mirdeep2/mapper/tests/nextflow.config b/modules/nf-core/mirdeep2/mapper/tests/nextflow.config new file mode 100644 index 00000000000..ec097561e60 --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + withName: 'MIRDEEP2_MAPPER' { + ext.args = "-c -j -k TCGTATGCCGTCTTCTGCTTGT -l 18 -m -v" + } + + withName: 'SEQKIT_REPLACE' { + ext.args = "-p '\s.+'" + ext.suffix = "fasta" + } + +} diff --git a/modules/nf-core/mirdeep2/mirdeep2/environment.yml b/modules/nf-core/mirdeep2/mirdeep2/environment.yml new file mode 100644 index 00000000000..fafc6663255 --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::mirdeep2=2.0.1.2" diff --git a/modules/nf-core/mirdeep2/mirdeep2/main.nf b/modules/nf-core/mirdeep2/mirdeep2/main.nf new file mode 100644 index 00000000000..d2befc6a040 --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/main.nf @@ -0,0 +1,59 @@ +process MIRDEEP2_MIRDEEP2 { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.2--0': + 'biocontainers/mirdeep2:2.0.1.2--0' }" + + input: + tuple val(meta), path(processed_reads), path(genome_mappings) + tuple val(meta2), path(fasta) + tuple val(meta3), path(mature), path(hairpin) + + output: + tuple val(meta), path("result*.{bed,csv,html}") , emit: outputs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.0.1' + """ + miRDeep2.pl \\ + $processed_reads \\ + $fasta \\ + $genome_mappings \\ + $mature \\ + none \\ + $hairpin + + mv result_*.bed result_${prefix}.bed + mv result_*.csv result_${prefix}.csv + mv result_*.html result_${prefix}.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mirdeep2: \$(echo "$VERSION") + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.0.1' + """ + touch result_${prefix}.html + touch result_${prefix}.bed + touch result_${prefix}.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mirdeep2: \$(echo "$VERSION") + END_VERSIONS + """ +} diff --git a/modules/nf-core/mirdeep2/mirdeep2/meta.yml b/modules/nf-core/mirdeep2/mirdeep2/meta.yml new file mode 100644 index 00000000000..49e915dc814 --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/meta.yml @@ -0,0 +1,65 @@ +name: "mirdeep2_mirdeep2" +description: | + miRDeep2 is a tool for identifying known and novel miRNAs in deep sequencing data by analyzing sequenced RNAs. It integrates the mapping of sequencing reads to the genome and predicts miRNA precursors and mature miRNAs. +keywords: + - mirdeep2 + - miRNA + - RNA sequencing +tools: + - "mirdeep2": + description: | + miRDeep2 is a tool that discovers microRNA genes by analyzing sequenced RNAs. + It includes three main scripts: `miRDeep2.pl`, `mapper.pl`, and `quantifier.pl` for comprehensive miRNA detection and quantification. + homepage: "https://www.mdc-berlin.de/content/mirdeep2-documentation" + documentation: "https://www.mdc-berlin.de/content/mirdeep2-documentation" + tool_dev_url: "https://github.com/rajewsky-lab/mirdeep2" + doi: "10.1093/nar/gkn491" + licence: ["GPL V3"] + +input: + - meta: + type: map + description: Groovy Map containing sample information, e.g. `[ id:'sample1', single_end:false ]` + - processed_reads: + type: file + description: FASTA file containing the processed sequencing reads. + pattern: "*.fa" + - genome_mappings: + type: file + description: ARF format file with mapped reads to the genome. + pattern: "*.arf" + - meta2: + type: map + description: Groovy Map for genome FASTA file metadata, e.g. `[ id:'genome']` + - fasta: + type: file + description: FASTA file of the corresponding genome. + pattern: "*.fa" + - meta3: + type: map + description: Groovy Map for miRNA metadata, e.g. `[ id:'mirbase', single_end:false ]` + - mature: + type: file + description: FASTA file containing known mature miRNAs of the species being analyzed. + pattern: "*.fa" + - hairpin: + type: file + description: FASTA file containing hairpin sequences (miRNA precursors). + pattern: "*.fa" + +output: + - meta: + type: map + description: Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` + - outputs: + type: file + description: Output files, including BED, CSV, and HTML results files with an overview of detected miRNAs. + pattern: "result*.{bed,csv,html}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@atrigila" +maintainers: + - "@atrigila" diff --git a/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test new file mode 100644 index 00000000000..3d3a769d318 --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test @@ -0,0 +1,105 @@ +nextflow_process { + + name "Test Process MIRDEEP2_MIRDEEP2" + script "../main.nf" + process "MIRDEEP2_MIRDEEP2" + + tag "modules" + tag "modules_nfcore" + tag "mirdeep2" + tag "mirdeep2/mirdeep2" + tag "bowtie/build" + tag "mirdeep2/mapper" + + + setup { + run("BOWTIE_BUILD") { + script "../../../bowtie/build/main.nf" + process { + """ + input[0] = [ + [ id:'genome_cel_cluster' ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/cel_cluster.fa', checkIfExists: true) + ] + """ + } + } + + run("MIRDEEP2_MAPPER") { + script "../../../mirdeep2/mapper/main.nf" + config "./nextflow.config" + + process { + """ + input[0] = [ + [ id:'test_reads', single_end:false ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/reads.fa', checkIfExists: true) + ] + input[1] = BOWTIE_BUILD.out.index + """ + } + } + + } + + test("mirdeep2 - mirdeep2 - fa") { + + when { + process { + """ + input[0] = MIRDEEP2_MAPPER.out.mirdeep2_inputs + input[1] = [ + [ id:'genome_cel_cluster' ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/cel_cluster.fa', checkIfExists: true) + ] + input[2] = [ + [ id:'hairpin_mature'], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/mature_ref_this_species.fa', checkIfExists: true), + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/precursors_ref_this_species.fa', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + // Assert .html + { assert path(process.out.outputs.get(0).get(1)[2]).readLines().last().contains('') } + ) + } + + } + + test("mirdeep - mirdeep2 - stub") { + + options "-stub" + + when { + process { + """ + input[0] = MIRDEEP2_MAPPER.out.mirdeep2_inputs + input[1] = [ + [ id:'genome_cel_cluster' ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/cel_cluster.fa', checkIfExists: true) + ] + input[2] = [ + [ id:'hairpin_mature'], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/mature_ref_this_species.fa', checkIfExists: true), + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/mature_ref_other_species.fa', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap new file mode 100644 index 00000000000..928d27bc326 --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap @@ -0,0 +1,57 @@ +{ + "mirdeep - mirdeep2 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_reads", + "single_end": false + }, + [ + "result_test_reads.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "result_test_reads.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "result_test_reads.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,8984ad2f1e8bdd148da051e2e6b569bf" + ], + "outputs": [ + [ + { + "id": "test_reads", + "single_end": false + }, + [ + "result_test_reads.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "result_test_reads.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "result_test_reads.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,8984ad2f1e8bdd148da051e2e6b569bf" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-17T04:50:15.746756906" + }, + "mirdeep2 - mirdeep2 - fa": { + "content": [ + [ + "versions.yml:md5,8984ad2f1e8bdd148da051e2e6b569bf" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-17T15:06:28.029563411" + } +} \ No newline at end of file diff --git a/modules/nf-core/mirdeep2/mirdeep2/tests/nextflow.config b/modules/nf-core/mirdeep2/mirdeep2/tests/nextflow.config new file mode 100644 index 00000000000..6a33ae05eed --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MIRDEEP2_MAPPER' { + ext.args = "-c -j -k TCGTATGCCGTCTTCTGCTTGT -l 18 -m -v" + } +} diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/main.nf b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/main.nf new file mode 100644 index 00000000000..4fa21d17af3 --- /dev/null +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/main.nf @@ -0,0 +1,40 @@ +include { BOWTIE_BUILD } from '../../../modules/nf-core/bowtie/build/main' +include { SEQKIT_FQ2FA } from '../../../modules/nf-core/seqkit/fq2fa/main' +include { SEQKIT_REPLACE } from '../../../modules/nf-core/seqkit/replace/main' +include { SEQKIT_REPLACE as SEQKIT_REPLACE_GENOME } from '../../../modules/nf-core/seqkit/replace/main' +include { MIRDEEP2_MAPPER } from '../../../modules/nf-core/mirdeep2/mapper/main' +include { MIRDEEP2_MIRDEEP2 } from '../../../modules/nf-core/mirdeep2/mirdeep2/main' + +workflow FASTQ_FIND_MIRNA_MIRDEEP2 { + + take: + ch_reads // channel: [ val(meta), fastq ] + ch_genome_fasta // channel: [ val(meta), genome_fasta ] + ch_mirna_mature_hairpin // channel: [ val(meta), mature_mirna, hairpin_mirna ] + + main: + + ch_versions = Channel.empty() + + SEQKIT_FQ2FA ( ch_reads ) + ch_versions = ch_versions.mix(SEQKIT_FQ2FA.out.versions) + + SEQKIT_REPLACE ( SEQKIT_FQ2FA.out.fasta ) + ch_versions = ch_versions.mix(SEQKIT_REPLACE.out.versions) + + SEQKIT_REPLACE_GENOME ( ch_genome_fasta ) + ch_versions = ch_versions.mix(SEQKIT_REPLACE_GENOME.out.versions) + + BOWTIE_BUILD ( SEQKIT_REPLACE_GENOME.out.fastx ) + ch_versions = ch_versions.mix(BOWTIE_BUILD.out.versions) + + MIRDEEP2_MAPPER ( SEQKIT_REPLACE.out.fastx, BOWTIE_BUILD.out.index ) + ch_versions = ch_versions.mix(MIRDEEP2_MAPPER.out.versions) + + MIRDEEP2_MIRDEEP2 ( MIRDEEP2_MAPPER.out.mirdeep2_inputs, SEQKIT_REPLACE_GENOME.out.fastx, ch_mirna_mature_hairpin ) + ch_versions = ch_versions.mix(MIRDEEP2_MIRDEEP2.out.versions) + + emit: + outputs = MIRDEEP2_MIRDEEP2.out.outputs // channel: [ val(meta), [ bed, csv, html ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/meta.yml b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/meta.yml new file mode 100644 index 00000000000..22a475b36f7 --- /dev/null +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/meta.yml @@ -0,0 +1,51 @@ +name: "fastq_find_mirna_mirdeep2" +description: | + This subworkflow identifies miRNAs from FASTQ files using miRDeep2. The workflow converts FASTQ to FASTA, processes and replaces any whitespace in sequence IDs, builds a Bowtie index of the genome, and then maps reads using miRDeep2 mapper before identifying known and novel miRNAs. +keywords: + - miRNA + - FASTQ + - FASTA + - Bowtie + - miRDeep2 +components: + - seqkit/fq2fa + - seqkit/replace + - bowtie/build + - mirdeep2/mapper + - mirdeep2/mirdeep2 +input: + - ch_reads: + type: file + description: | + The input channel containing the FASTQ files to process and identify miRNAs. + Structure: [ val(meta), path(fastq) ] + pattern: "*.fastq.gz" + - ch_genome_fasta: + type: file + description: | + The input channel containing the genome FASTA files used to build the Bowtie index. + Structure: [ val(meta), path(fasta) ] + pattern: "*.fa" + - ch_mirna_mature_hairpin: + type: file + description: | + The input channel containing the mature and hairpin miRNA sequences for miRNA identification. + Structure: [ val(meta), path(mature_fasta), path(hairpin_fasta) ] + pattern: "*.fa" +output: + - outputs: + type: file + description: | + The output channel containing the BED, CSV, and HTML files with the identified miRNAs. + Structure: [ val(meta), path(bed), path(csv), path(html) ] + pattern: "*.{bed,csv,html}" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@atrigila" +maintainers: + - "@atrigila" diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test new file mode 100644 index 00000000000..3bf425fda4c --- /dev/null +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test @@ -0,0 +1,51 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_FIND_MIRNA_MIRDEEP2" + script "../main.nf" + workflow "FASTQ_FIND_MIRNA_MIRDEEP2" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_find_mirna_mirdeep2" + tag "mirdeep2/mapper" + tag "mirdeep2/mirdeep2" + tag "seqkit/fq2fa" + tag "seqkit/replace" + tag "bowtie/build" + + + test("smrnaseq - fasta - single_end") { + config "./nextflow.config" + + when { + workflow { + """ + input[0] = [ + [ id:'small_Clone1_N1', single_end:false ], // meta map + file('https://github.com/nf-core/test-datasets/raw/smrnaseq/testdata/trimmed/small_Clone1_N1.fastp.fastq.gz', checkIfExists: true) + ] + + input[1] = [ + [ id:'genome' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa', checkIfExists: true) + ] + + input[2] = [ + [ id:'mirna_mature_hairpin'], // meta map + file('https://github.com/nf-core/test-datasets/raw/smrnaseq/MirGeneDB/mirgenedb_hsa_mature.fa', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/smrnaseq/MirGeneDB/mirgenedb_hsa_hairpin.fa', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.versions).match()}, + // Assert .html + { assert path(workflow.out.outputs.get(0).get(1)[2]).readLines().last().contains('') } + ) + } + } +} diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap new file mode 100644 index 00000000000..8008a90164d --- /dev/null +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap @@ -0,0 +1,50 @@ +{ + "sarscov2 - bam - single_end": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,706a3f609ec9d66162576d93a6f6a67b", + "versions.yml:md5,d472e8b33e4a8a71257c5216ad452b61" + ], + "mapped_reads": [ + + ], + "mirna_outputs": [ + + ], + "versions": [ + "versions.yml:md5,706a3f609ec9d66162576d93a6f6a67b", + "versions.yml:md5,d472e8b33e4a8a71257c5216ad452b61" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-17T18:23:58.032563457" + }, + "smrnaseq - fasta - single_end": { + "content": [ + [ + "versions.yml:md5,10138b74aed5b2658c26ddf80ff391d5", + "versions.yml:md5,5db2a49c5d0b56aee7b20885091c7276", + "versions.yml:md5,631c0428c28d5355f0e3e9bd790bd77d", + "versions.yml:md5,706a3f609ec9d66162576d93a6f6a67b", + "versions.yml:md5,756eee52b4a45f7a9effe33b1cd3cb92", + "versions.yml:md5,d472e8b33e4a8a71257c5216ad452b61" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-17T19:39:14.477668102" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/nextflow.config b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/nextflow.config new file mode 100644 index 00000000000..665b60611d6 --- /dev/null +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/nextflow.config @@ -0,0 +1,16 @@ +process { + withName: 'MIRDEEP2_MAPPER' { + ext.args = "-c -j -k TCGTATGCCGTCTTCTGCTTGT -l 18 -m -v" + } + + withName: 'SEQKIT_REPLACE' { + ext.args = "-p '\s.+'" + ext.suffix = "fasta" + } + + withName: 'SEQKIT_REPLACE_GENOME' { + ext.args = "-p '\s.+'" + ext.suffix = "fasta" + } + +} From 86a63076f64596212a61c87e51f46778bea9d6c7 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 18 Sep 2024 15:38:31 +0000 Subject: [PATCH 2/9] fix prettier --- modules/nf-core/mirdeep2/mapper/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/mirdeep2/mapper/meta.yml b/modules/nf-core/mirdeep2/mapper/meta.yml index 9f798e93970..a830edb7926 100644 --- a/modules/nf-core/mirdeep2/mapper/meta.yml +++ b/modules/nf-core/mirdeep2/mapper/meta.yml @@ -41,7 +41,7 @@ output: pattern: "*" - versions: type: file - description: File containing software versions for tracking. + description: File containing software versions for tracking. pattern: "versions.yml" authors: - "@atrigila" From 7c12d11507d482bb173cd916ab614f0f14a29a60 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 20 Sep 2024 14:49:26 +0000 Subject: [PATCH 3/9] fix linting --- modules/nf-core/mirdeep2/mapper/meta.yml | 60 +++++++++------ modules/nf-core/mirdeep2/mirdeep2/meta.yml | 85 ++++++++++++---------- 2 files changed, 82 insertions(+), 63 deletions(-) diff --git a/modules/nf-core/mirdeep2/mapper/meta.yml b/modules/nf-core/mirdeep2/mapper/meta.yml index a830edb7926..7140e0a6dd4 100644 --- a/modules/nf-core/mirdeep2/mapper/meta.yml +++ b/modules/nf-core/mirdeep2/mapper/meta.yml @@ -14,35 +14,47 @@ tools: tool_dev_url: "https://github.com/rajewsky-lab/mirdeep2" doi: "10.1093/nar/gkn491" licence: ["GPL V3"] + identifier: biotools:mirdeep2 input: - - meta: - type: map - description: Groovy Map containing sample information, e.g. `[ id:'sample1', single_end:false ]` - - reads: - type: file - description: File containing the raw sequencing reads that need to be collapsed and mapped to a reference genome. - pattern: "*.fa" - - meta2: - type: map - description: Groovy Map containing information about the genome index. - - index: - type: file - description: Path to the genome index file used for mapping the reads to the genome. - pattern: "*" - + - - meta: + type: map + description: Groovy Map containing sample information, e.g. `[ id:'sample1', + single_end:false ]` + - reads: + type: file + description: File containing the raw sequencing reads that need to be collapsed + and mapped to a reference genome. + pattern: "*.fa" + - - meta2: + type: map + description: Groovy Map containing information about the genome index. + - index: + type: file + description: Path to the genome index file used for mapping the reads to the + genome. + pattern: "*" output: - - meta: - type: map - description: Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - mirdeep2_inputs: - type: file - description: Outputs for miRDeep2 including the collapsed reads file (FASTA) and the mapped reads in ARF format. - pattern: "*" + - meta: + type: map + description: Groovy Map containing sample information e.g. `[ id:'sample1', + single_end:false ]` + - "*.fa": + type: file + description: Outputs for miRDeep2 including the collapsed reads file (FASTA) + and the mapped reads in ARF format. + pattern: "*" + - "*.arf": + type: file + description: Outputs for miRDeep2 including the collapsed reads file (FASTA) + and the mapped reads in ARF format. + pattern: "*" - versions: - type: file - description: File containing software versions for tracking. - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions for tracking. + pattern: "versions.yml" authors: - "@atrigila" maintainers: diff --git a/modules/nf-core/mirdeep2/mirdeep2/meta.yml b/modules/nf-core/mirdeep2/mirdeep2/meta.yml index 49e915dc814..eaa8e4d40ca 100644 --- a/modules/nf-core/mirdeep2/mirdeep2/meta.yml +++ b/modules/nf-core/mirdeep2/mirdeep2/meta.yml @@ -15,50 +15,57 @@ tools: tool_dev_url: "https://github.com/rajewsky-lab/mirdeep2" doi: "10.1093/nar/gkn491" licence: ["GPL V3"] + identifier: biotools:mirdeep2 input: - - meta: - type: map - description: Groovy Map containing sample information, e.g. `[ id:'sample1', single_end:false ]` - - processed_reads: - type: file - description: FASTA file containing the processed sequencing reads. - pattern: "*.fa" - - genome_mappings: - type: file - description: ARF format file with mapped reads to the genome. - pattern: "*.arf" - - meta2: - type: map - description: Groovy Map for genome FASTA file metadata, e.g. `[ id:'genome']` - - fasta: - type: file - description: FASTA file of the corresponding genome. - pattern: "*.fa" - - meta3: - type: map - description: Groovy Map for miRNA metadata, e.g. `[ id:'mirbase', single_end:false ]` - - mature: - type: file - description: FASTA file containing known mature miRNAs of the species being analyzed. - pattern: "*.fa" - - hairpin: - type: file - description: FASTA file containing hairpin sequences (miRNA precursors). - pattern: "*.fa" - + - - meta: + type: map + description: Groovy Map containing sample information, e.g. `[ id:'sample1', + single_end:false ]` + - processed_reads: + type: file + description: FASTA file containing the processed sequencing reads. + pattern: "*.fa" + - genome_mappings: + type: file + description: ARF format file with mapped reads to the genome. + pattern: "*.arf" + - - meta2: + type: map + description: Groovy Map for genome FASTA file metadata, e.g. `[ id:'genome']` + - fasta: + type: file + description: FASTA file of the corresponding genome. + pattern: "*.fa" + - - meta3: + type: map + description: Groovy Map for miRNA metadata, e.g. `[ id:'mirbase', single_end:false + ]` + - mature: + type: file + description: FASTA file containing known mature miRNAs of the species being + analyzed. + pattern: "*.fa" + - hairpin: + type: file + description: FASTA file containing hairpin sequences (miRNA precursors). + pattern: "*.fa" output: - - meta: - type: map - description: Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - outputs: - type: file - description: Output files, including BED, CSV, and HTML results files with an overview of detected miRNAs. - pattern: "result*.{bed,csv,html}" + - meta: + type: map + description: Groovy Map containing sample information e.g. `[ id:'sample1', + single_end:false ]` + - result*.{bed,csv,html}: + type: file + description: Output files, including BED, CSV, and HTML results files with an + overview of detected miRNAs. + pattern: "result*.{bed,csv,html}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@atrigila" maintainers: From bad0b360d41c370bce7b5c1ae5ff283ca3e12d53 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 20 Sep 2024 21:18:01 +0000 Subject: [PATCH 4/9] reformat subworkflow structure --- modules/nf-core/mirdeep2/mapper/main.nf | 2 +- .../mirdeep2/mapper/tests/main.nf.test | 10 ++--- .../mirdeep2/mapper/tests/main.nf.test.snap | 4 +- .../mirdeep2/mirdeep2/tests/main.nf.test | 4 +- .../mirdeep2/mirdeep2/tests/main.nf.test.snap | 4 +- .../nf-core/fastq_find_mirna_mirdeep2/main.nf | 13 ++----- .../tests/main.nf.test | 34 ++++++++++++++--- .../tests/main.nf.test.snap | 37 +------------------ .../tests/nextflow.config | 5 --- 9 files changed, 46 insertions(+), 67 deletions(-) diff --git a/modules/nf-core/mirdeep2/mapper/main.nf b/modules/nf-core/mirdeep2/mapper/main.nf index 3d53b4e8d28..d52820a362b 100644 --- a/modules/nf-core/mirdeep2/mapper/main.nf +++ b/modules/nf-core/mirdeep2/mapper/main.nf @@ -12,7 +12,7 @@ process MIRDEEP2_MAPPER { tuple val(meta2), path(index, stageAs: '*') output: - tuple val(meta), path('*.fa'), path('*.arf'), emit: mirdeep2_inputs + tuple val(meta), path('*.fa'), path('*.arf'), emit: outputs path "versions.yml" , emit: versions when: diff --git a/modules/nf-core/mirdeep2/mapper/tests/main.nf.test b/modules/nf-core/mirdeep2/mapper/tests/main.nf.test index 0924f77124b..62e3e615abc 100644 --- a/modules/nf-core/mirdeep2/mapper/tests/main.nf.test +++ b/modules/nf-core/mirdeep2/mapper/tests/main.nf.test @@ -74,13 +74,13 @@ nextflow_process { // md5sum not stable - IDs change while sequences are the same // Assert TCACCGGGGGTACATCAGCTAA occurs once - { assert file(process.out.mirdeep2_inputs[0][1]).readLines().findAll { it.contains("TCACCGGGGGTACATCAGCTAA") }.size() == 1 }, + { assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("TCACCGGGGGTACATCAGCTAA") }.size() == 1 }, // Assert seq_347479_x287 occurs once - { assert file(process.out.mirdeep2_inputs[0][1]).readLines().findAll { it.contains("seq_347479_x287") }.size() == 1 }, + { assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("seq_347479_x287") }.size() == 1 }, // Assert that specific content occurs 4 times - { assert file(process.out.mirdeep2_inputs[0][2]).readLines().findAll { it.contains("21\t1\t21\ttcaccgggtgtaaatcagctt\tchrII:11534525-11540624\t21\t3535\t3555\ttcaccgggtgtaaatcagctt\t+\t0\tmmmmmmmmmmmmmmmmmmmmm") }.size() == 4 } + { assert file(process.out.outputs[0][2]).readLines().findAll { it.contains("21\t1\t21\ttcaccgggtgtaaatcagctt\tchrII:11534525-11540624\t21\t3535\t3555\ttcaccgggtgtaaatcagctt\t+\t0\tmmmmmmmmmmmmmmmmmmmmm") }.size() == 4 } ) } @@ -103,10 +103,10 @@ nextflow_process { { assert process.success }, // Assert reads occurs once - { assert file(process.out.mirdeep2_inputs[0][1]).readLines().findAll { it.contains("TACCTGAGGTAGCAGGTTGTATAGTTGGGG") }.size() == 1 }, + { assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("TACCTGAGGTAGCAGGTTGTATAGTTGGGG") }.size() == 1 }, // Assert ID occurs once - { assert file(process.out.mirdeep2_inputs[0][1]).readLines().findAll { it.contains("seq_996152_x1") }.size() == 1 } + { assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("seq_996152_x1") }.size() == 1 } ) } diff --git a/modules/nf-core/mirdeep2/mapper/tests/main.nf.test.snap b/modules/nf-core/mirdeep2/mapper/tests/main.nf.test.snap index 52d6933a67c..4c3697d976e 100644 --- a/modules/nf-core/mirdeep2/mapper/tests/main.nf.test.snap +++ b/modules/nf-core/mirdeep2/mapper/tests/main.nf.test.snap @@ -15,7 +15,7 @@ "1": [ "versions.yml:md5,33c794292d6772d67fa8001439394614" ], - "mirdeep2_inputs": [ + "outputs": [ [ { "id": "test_reads", @@ -34,7 +34,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-17T17:42:03.429229932" + "timestamp": "2024-09-20T20:58:19.544297445" }, "mirdeep2 - mapper - fasta celegans": { "content": [ diff --git a/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test index 3d3a769d318..e423034ab0c 100644 --- a/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test +++ b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test @@ -47,7 +47,7 @@ nextflow_process { when { process { """ - input[0] = MIRDEEP2_MAPPER.out.mirdeep2_inputs + input[0] = MIRDEEP2_MAPPER.out.outputs input[1] = [ [ id:'genome_cel_cluster' ], // meta map file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/cel_cluster.fa', checkIfExists: true) @@ -79,7 +79,7 @@ nextflow_process { when { process { """ - input[0] = MIRDEEP2_MAPPER.out.mirdeep2_inputs + input[0] = MIRDEEP2_MAPPER.out.outputs input[1] = [ [ id:'genome_cel_cluster' ], // meta map file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/cel_cluster.fa', checkIfExists: true) diff --git a/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap index 928d27bc326..e825335f5b9 100644 --- a/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap +++ b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap @@ -40,7 +40,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-17T04:50:15.746756906" + "timestamp": "2024-09-20T21:04:53.304188615" }, "mirdeep2 - mirdeep2 - fa": { "content": [ @@ -52,6 +52,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-17T15:06:28.029563411" + "timestamp": "2024-09-20T21:04:34.236180659" } } \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/main.nf b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/main.nf index 4fa21d17af3..f8c3da93113 100644 --- a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/main.nf +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/main.nf @@ -1,7 +1,5 @@ -include { BOWTIE_BUILD } from '../../../modules/nf-core/bowtie/build/main' include { SEQKIT_FQ2FA } from '../../../modules/nf-core/seqkit/fq2fa/main' include { SEQKIT_REPLACE } from '../../../modules/nf-core/seqkit/replace/main' -include { SEQKIT_REPLACE as SEQKIT_REPLACE_GENOME } from '../../../modules/nf-core/seqkit/replace/main' include { MIRDEEP2_MAPPER } from '../../../modules/nf-core/mirdeep2/mapper/main' include { MIRDEEP2_MIRDEEP2 } from '../../../modules/nf-core/mirdeep2/mirdeep2/main' @@ -10,6 +8,7 @@ workflow FASTQ_FIND_MIRNA_MIRDEEP2 { take: ch_reads // channel: [ val(meta), fastq ] ch_genome_fasta // channel: [ val(meta), genome_fasta ] + ch_bowtie_index // channel: [ val(meta), index ] ch_mirna_mature_hairpin // channel: [ val(meta), mature_mirna, hairpin_mirna ] main: @@ -22,16 +21,10 @@ workflow FASTQ_FIND_MIRNA_MIRDEEP2 { SEQKIT_REPLACE ( SEQKIT_FQ2FA.out.fasta ) ch_versions = ch_versions.mix(SEQKIT_REPLACE.out.versions) - SEQKIT_REPLACE_GENOME ( ch_genome_fasta ) - ch_versions = ch_versions.mix(SEQKIT_REPLACE_GENOME.out.versions) - - BOWTIE_BUILD ( SEQKIT_REPLACE_GENOME.out.fastx ) - ch_versions = ch_versions.mix(BOWTIE_BUILD.out.versions) - - MIRDEEP2_MAPPER ( SEQKIT_REPLACE.out.fastx, BOWTIE_BUILD.out.index ) + MIRDEEP2_MAPPER ( SEQKIT_REPLACE.out.fastx, ch_bowtie_index ) ch_versions = ch_versions.mix(MIRDEEP2_MAPPER.out.versions) - MIRDEEP2_MIRDEEP2 ( MIRDEEP2_MAPPER.out.mirdeep2_inputs, SEQKIT_REPLACE_GENOME.out.fastx, ch_mirna_mature_hairpin ) + MIRDEEP2_MIRDEEP2 ( MIRDEEP2_MAPPER.out.outputs, ch_genome_fasta, ch_mirna_mature_hairpin ) ch_versions = ch_versions.mix(MIRDEEP2_MIRDEEP2.out.versions) emit: diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test index 3bf425fda4c..9992ef0cc12 100644 --- a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test @@ -17,6 +17,31 @@ nextflow_workflow { test("smrnaseq - fasta - single_end") { config "./nextflow.config" + setup { + run("SEQKIT_REPLACE") { + script "modules/nf-core/seqkit/replace/main.nf" + config "./nextflow.config" + + process { + """ + input[0] = [ + [ id:'genome' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa', checkIfExists: true) + ] + """ + } + } + + run("BOWTIE_BUILD") { + script "modules/nf-core/bowtie/build/main.nf" + process { + """ + input[0] = SEQKIT_REPLACE.out.fastx + """ + } + } + } + when { workflow { """ @@ -25,12 +50,11 @@ nextflow_workflow { file('https://github.com/nf-core/test-datasets/raw/smrnaseq/testdata/trimmed/small_Clone1_N1.fastp.fastq.gz', checkIfExists: true) ] - input[1] = [ - [ id:'genome' ], // meta map - file('https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa', checkIfExists: true) - ] + input[1] = SEQKIT_REPLACE.out.fastx + + input[2] = BOWTIE_BUILD.out.index - input[2] = [ + input[3] = [ [ id:'mirna_mature_hairpin'], // meta map file('https://github.com/nf-core/test-datasets/raw/smrnaseq/MirGeneDB/mirgenedb_hsa_mature.fa', checkIfExists: true), file('https://github.com/nf-core/test-datasets/raw/smrnaseq/MirGeneDB/mirgenedb_hsa_hairpin.fa', checkIfExists: true) diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap index 8008a90164d..b076f0b201c 100644 --- a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap @@ -1,50 +1,17 @@ { - "sarscov2 - bam - single_end": { - "content": [ - { - "0": [ - - ], - "1": [ - - ], - "2": [ - "versions.yml:md5,706a3f609ec9d66162576d93a6f6a67b", - "versions.yml:md5,d472e8b33e4a8a71257c5216ad452b61" - ], - "mapped_reads": [ - - ], - "mirna_outputs": [ - - ], - "versions": [ - "versions.yml:md5,706a3f609ec9d66162576d93a6f6a67b", - "versions.yml:md5,d472e8b33e4a8a71257c5216ad452b61" - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-17T18:23:58.032563457" - }, "smrnaseq - fasta - single_end": { "content": [ [ "versions.yml:md5,10138b74aed5b2658c26ddf80ff391d5", - "versions.yml:md5,5db2a49c5d0b56aee7b20885091c7276", "versions.yml:md5,631c0428c28d5355f0e3e9bd790bd77d", "versions.yml:md5,706a3f609ec9d66162576d93a6f6a67b", - "versions.yml:md5,756eee52b4a45f7a9effe33b1cd3cb92", - "versions.yml:md5,d472e8b33e4a8a71257c5216ad452b61" + "versions.yml:md5,756eee52b4a45f7a9effe33b1cd3cb92" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-17T19:39:14.477668102" + "timestamp": "2024-09-20T21:00:27.799153602" } } \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/nextflow.config b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/nextflow.config index 665b60611d6..ec097561e60 100644 --- a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/nextflow.config +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/nextflow.config @@ -8,9 +8,4 @@ process { ext.suffix = "fasta" } - withName: 'SEQKIT_REPLACE_GENOME' { - ext.args = "-p '\s.+'" - ext.suffix = "fasta" - } - } From 788a2dac239190129ced2be19dd883c5f890809f Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 20 Sep 2024 22:14:15 +0000 Subject: [PATCH 5/9] fix linting --- modules/nf-core/mirdeep2/mapper/meta.yml | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/modules/nf-core/mirdeep2/mapper/meta.yml b/modules/nf-core/mirdeep2/mapper/meta.yml index 7140e0a6dd4..5844344cf97 100644 --- a/modules/nf-core/mirdeep2/mapper/meta.yml +++ b/modules/nf-core/mirdeep2/mapper/meta.yml @@ -35,21 +35,10 @@ input: genome. pattern: "*" output: - - mirdeep2_inputs: - - meta: - type: map - description: Groovy Map containing sample information e.g. `[ id:'sample1', - single_end:false ]` - - "*.fa": - type: file - description: Outputs for miRDeep2 including the collapsed reads file (FASTA) - and the mapped reads in ARF format. - pattern: "*" - - "*.arf": - type: file - description: Outputs for miRDeep2 including the collapsed reads file (FASTA) - and the mapped reads in ARF format. - pattern: "*" + - outputs: + - meta: {} + - "*.fa": {} + - "*.arf": {} - versions: - versions.yml: type: file From af708109dd4243eace20bec2b20e1d15f7caacbd Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Mon, 23 Sep 2024 13:05:37 +0000 Subject: [PATCH 6/9] allow paths to files or replace with 'none' --- modules/nf-core/mirdeep2/mirdeep2/main.nf | 13 +++++++++---- .../nf-core/mirdeep2/mirdeep2/tests/main.nf.test | 6 ++++-- .../fastq_find_mirna_mirdeep2/tests/main.nf.test | 3 ++- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/modules/nf-core/mirdeep2/mirdeep2/main.nf b/modules/nf-core/mirdeep2/mirdeep2/main.nf index d2befc6a040..66c859683ac 100644 --- a/modules/nf-core/mirdeep2/mirdeep2/main.nf +++ b/modules/nf-core/mirdeep2/mirdeep2/main.nf @@ -10,7 +10,7 @@ process MIRDEEP2_MIRDEEP2 { input: tuple val(meta), path(processed_reads), path(genome_mappings) tuple val(meta2), path(fasta) - tuple val(meta3), path(mature), path(hairpin) + tuple val(meta3), path(mature), path(hairpin), path(mature_other_species) output: tuple val(meta), path("result*.{bed,csv,html}") , emit: outputs @@ -23,14 +23,19 @@ process MIRDEEP2_MIRDEEP2 { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = '2.0.1' + def mature_species = mature ? "${mature}" : "none" + def mature_other = mature_other_species ? "${mature_other_species}": "none" + def precursors = hairpin ? "${hairpin}" : "none" + """ miRDeep2.pl \\ $processed_reads \\ $fasta \\ $genome_mappings \\ - $mature \\ - none \\ - $hairpin + $mature_species \\ + $mature_other \\ + $precursors \\ + $args mv result_*.bed result_${prefix}.bed mv result_*.csv result_${prefix}.csv diff --git a/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test index e423034ab0c..a0ea2778c39 100644 --- a/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test +++ b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test @@ -55,7 +55,8 @@ nextflow_process { input[2] = [ [ id:'hairpin_mature'], // meta map file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/mature_ref_this_species.fa', checkIfExists: true), - file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/precursors_ref_this_species.fa', checkIfExists: true) + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/precursors_ref_this_species.fa', checkIfExists: true), + [] ] """ } @@ -87,7 +88,8 @@ nextflow_process { input[2] = [ [ id:'hairpin_mature'], // meta map file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/mature_ref_this_species.fa', checkIfExists: true), - file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/mature_ref_other_species.fa', checkIfExists: true) + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/mature_ref_other_species.fa', checkIfExists: true), + [] ] """ } diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test index 9992ef0cc12..742e4f85d5b 100644 --- a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test @@ -57,7 +57,8 @@ nextflow_workflow { input[3] = [ [ id:'mirna_mature_hairpin'], // meta map file('https://github.com/nf-core/test-datasets/raw/smrnaseq/MirGeneDB/mirgenedb_hsa_mature.fa', checkIfExists: true), - file('https://github.com/nf-core/test-datasets/raw/smrnaseq/MirGeneDB/mirgenedb_hsa_hairpin.fa', checkIfExists: true) + file('https://github.com/nf-core/test-datasets/raw/smrnaseq/MirGeneDB/mirgenedb_hsa_hairpin.fa', checkIfExists: true), + [] ] """ } From 4aac2329c6e2e93ae20200ab34e8ac90b68474f4 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Mon, 23 Sep 2024 13:17:24 +0000 Subject: [PATCH 7/9] fix linting --- modules/nf-core/mirdeep2/mirdeep2/meta.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/nf-core/mirdeep2/mirdeep2/meta.yml b/modules/nf-core/mirdeep2/mirdeep2/meta.yml index eaa8e4d40ca..325a31a3d88 100644 --- a/modules/nf-core/mirdeep2/mirdeep2/meta.yml +++ b/modules/nf-core/mirdeep2/mirdeep2/meta.yml @@ -50,6 +50,7 @@ input: type: file description: FASTA file containing hairpin sequences (miRNA precursors). pattern: "*.fa" + - mature_other_species: {} output: - outputs: - meta: From 8041439332f707cf6a50d4dad2d2b9e2071a4938 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Mon, 23 Sep 2024 15:11:55 +0000 Subject: [PATCH 8/9] capture unstable outputs in snapshot --- modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test | 6 +++++- modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap | 7 +++++-- .../nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test | 6 +++++- .../fastq_find_mirna_mirdeep2/tests/main.nf.test.snap | 7 +++++-- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test index a0ea2778c39..b7b73ec123d 100644 --- a/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test +++ b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test @@ -65,7 +65,11 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.versions).match() }, + { assert snapshot(process.out.versions, + path(process.out.outputs.get(0).get(1)[2]).readLines().last().contains(''), + process.out.outputs.get(0).get(1)[0], + path(process.out.outputs.get(0).get(1)[1]).readLines().first().contains('miRDeep2 score') + ).match() }, // Assert .html { assert path(process.out.outputs.get(0).get(1)[2]).readLines().last().contains('') } ) diff --git a/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap index e825335f5b9..f8ffcf019db 100644 --- a/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap +++ b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap @@ -46,12 +46,15 @@ "content": [ [ "versions.yml:md5,8984ad2f1e8bdd148da051e2e6b569bf" - ] + ], + true, + "result_test_reads.bed:md5,ba5ef5782e40d7219ca064dd68865d74", + true ], "meta": { "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-20T21:04:34.236180659" + "timestamp": "2024-09-23T15:08:50.660562955" } } \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test index 742e4f85d5b..13c10e52658 100644 --- a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test @@ -67,7 +67,11 @@ nextflow_workflow { then { assertAll( { assert workflow.success}, - { assert snapshot(workflow.out.versions).match()}, + { assert snapshot(workflow.out.versions, + path(workflow.out.outputs.get(0).get(1)[2]).readLines().last().contains(''), + workflow.out.outputs.get(0).get(1)[0], + path(workflow.out.outputs.get(0).get(1)[1]).readLines().first().contains('miRDeep2 score') + ).match()}, // Assert .html { assert path(workflow.out.outputs.get(0).get(1)[2]).readLines().last().contains('') } ) diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap index b076f0b201c..c48df3d7ed9 100644 --- a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap @@ -6,12 +6,15 @@ "versions.yml:md5,631c0428c28d5355f0e3e9bd790bd77d", "versions.yml:md5,706a3f609ec9d66162576d93a6f6a67b", "versions.yml:md5,756eee52b4a45f7a9effe33b1cd3cb92" - ] + ], + true, + "result_small_Clone1_N1.bed:md5,98a74ac6dd16ee876e9a3f54d2695c88", + true ], "meta": { "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-20T21:00:27.799153602" + "timestamp": "2024-09-23T14:56:03.274059331" } } \ No newline at end of file From 487a3bc68b49b585d4c667162137984c781ca709 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Mon, 23 Sep 2024 15:20:07 +0000 Subject: [PATCH 9/9] update meta --- modules/nf-core/mirdeep2/mirdeep2/meta.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/nf-core/mirdeep2/mirdeep2/meta.yml b/modules/nf-core/mirdeep2/mirdeep2/meta.yml index 325a31a3d88..adf1410195b 100644 --- a/modules/nf-core/mirdeep2/mirdeep2/meta.yml +++ b/modules/nf-core/mirdeep2/mirdeep2/meta.yml @@ -50,7 +50,10 @@ input: type: file description: FASTA file containing hairpin sequences (miRNA precursors). pattern: "*.fa" - - mature_other_species: {} + - mature_other_species: + type: file + description: FASTA file containing known mature miRNAs of other species. + pattern: "*.fa" output: - outputs: - meta: