Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nf-core module and subworkflow mirdeep2 #6662

Merged
merged 13 commits into from
Sep 23, 2024
7 changes: 7 additions & 0 deletions modules/nf-core/mirdeep2/mapper/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::mirdeep2=2.0.1.2"
53 changes: 53 additions & 0 deletions modules/nf-core/mirdeep2/mapper/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
process MIRDEEP2_MAPPER {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.2--0':
'biocontainers/mirdeep2:2.0.1.2--0' }"

input:
tuple val(meta), path(reads)
tuple val(meta2), path(index, stageAs: '*')

output:
tuple val(meta), path('*.fa'), path('*.arf'), emit: outputs
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '2.0.1'

"""
mapper.pl \\
${reads} \\
$args \\
-p ${index}/${meta2.id} \\
-s ${prefix}_collapsed.fa \\
-t ${prefix}_reads_collapsed_vs_${meta2.id}_genome.arf

cat <<-END_VERSIONS > versions.yml
"${task.process}":
mirdeep2: \$(echo "$VERSION")
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '2.0.1'
"""
touch ${prefix}.fa
touch ${prefix}reads_vs_refdb.arf

cat <<-END_VERSIONS > versions.yml
"${task.process}":
mirdeep2: \$(echo "$VERSION")
END_VERSIONS
"""
}
50 changes: 50 additions & 0 deletions modules/nf-core/mirdeep2/mapper/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
name: "mirdeep2_mapper"
description: |
miRDeep2 Mapper is a tool that prepares deep sequencing reads for downstream miRNA detection by collapsing reads, mapping them to a genome, and outputting the required files for miRNA discovery.
keywords:
- mirdeep2
- mapper
- RNA sequencing
tools:
- "mirdeep2":
description: |
miRDeep2 Mapper (`mapper.pl`) is part of the miRDeep2 suite. It collapses identical reads, maps them to a reference genome, and outputs both collapsed FASTA and ARF files for downstream miRNA detection and analysis.
homepage: "https://www.mdc-berlin.de/content/mirdeep2-documentation"
documentation: "https://www.mdc-berlin.de/content/mirdeep2-documentation"
tool_dev_url: "https://github.com/rajewsky-lab/mirdeep2"
doi: "10.1093/nar/gkn491"
licence: ["GPL V3"]
identifier: biotools:mirdeep2

input:
- - meta:
type: map
description: Groovy Map containing sample information, e.g. `[ id:'sample1',
single_end:false ]`
- reads:
type: file
description: File containing the raw sequencing reads that need to be collapsed
and mapped to a reference genome.
pattern: "*.fa"
- - meta2:
type: map
description: Groovy Map containing information about the genome index.
- index:
type: file
description: Path to the genome index file used for mapping the reads to the
genome.
pattern: "*"
output:
- outputs:
- meta: {}
- "*.fa": {}
- "*.arf": {}
- versions:
- versions.yml:
type: file
description: File containing software versions for tracking.
pattern: "versions.yml"
authors:
- "@atrigila"
maintainers:
- "@atrigila"
141 changes: 141 additions & 0 deletions modules/nf-core/mirdeep2/mapper/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@

nextflow_process {

name "Test Process MIRDEEP2_MAPPER"
script "../main.nf"
process "MIRDEEP2_MAPPER"

tag "modules"
tag "modules_nfcore"
tag "mirdeep2"
tag "bowtie/build"
tag "mirdeep2/mapper"
tag "seqkit/fq2fa"
tag "seqkit/replace"


setup {
run("BOWTIE_BUILD") {
script "../../../bowtie/build/main.nf"
process {
"""
input[0] = [
[ id:'genome_cel_cluster' ], // meta map
file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/cel_cluster.fa', checkIfExists: true)
]
"""
}
}

run("SEQKIT_FQ2FA") {
script "../../../seqkit/fq2fa/main.nf"
process {
"""
input[0] = [
[ id:'small_Clone1_N1' ], // meta map
file('https://github.com/nf-core/test-datasets/raw/smrnaseq/testdata/trimmed/small_Clone1_N1.fastp.fastq.gz', checkIfExists: true)
]
"""
}
}

run("SEQKIT_REPLACE") {
script "../../../seqkit/replace/main.nf"
config "./nextflow.config"
process {
"""
input[0] = SEQKIT_FQ2FA.out.fasta
"""
}
}

}

test("mirdeep2 - mapper - fasta celegans") {
config "./nextflow.config"

when {
process {
"""
input[0] = [
[ id:'test_reads', single_end:false ], // meta map
file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/reads.fa', checkIfExists: true)
]
input[1] = BOWTIE_BUILD.out.index
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match() },

// md5sum not stable - IDs change while sequences are the same

// Assert TCACCGGGGGTACATCAGCTAA occurs once
{ assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("TCACCGGGGGTACATCAGCTAA") }.size() == 1 },

// Assert seq_347479_x287 occurs once
{ assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("seq_347479_x287") }.size() == 1 },

// Assert that specific content occurs 4 times
{ assert file(process.out.outputs[0][2]).readLines().findAll { it.contains("21\t1\t21\ttcaccgggtgtaaatcagctt\tchrII:11534525-11540624\t21\t3535\t3555\ttcaccgggtgtaaatcagctt\t+\t0\tmmmmmmmmmmmmmmmmmmmmm") }.size() == 4 }
)
}

}

test("mirdeep2 - mapper - fasta smrnaseq") {
config "./nextflow.config"

when {
process {
"""
input[0] = SEQKIT_REPLACE.out.fastx
input[1] = BOWTIE_BUILD.out.index
"""
}
}

then {
assertAll(
{ assert process.success },

// Assert reads occurs once
{ assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("TACCTGAGGTAGCAGGTTGTATAGTTGGGG") }.size() == 1 },

// Assert ID occurs once
{ assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("seq_996152_x1") }.size() == 1 }

)
}

}

test("mirdeep2 - fasta - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test_reads', single_end:false ], // meta map
file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/reads.fa', checkIfExists: true)
]
input[1] = BOWTIE_BUILD.out.index
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
51 changes: 51 additions & 0 deletions modules/nf-core/mirdeep2/mapper/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"mirdeep2 - fasta - stub": {
"content": [
{
"0": [
[
{
"id": "test_reads",
"single_end": false
},
"test_reads.fa:md5,d41d8cd98f00b204e9800998ecf8427e",
"test_readsreads_vs_refdb.arf:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
"versions.yml:md5,33c794292d6772d67fa8001439394614"
],
"outputs": [
[
{
"id": "test_reads",
"single_end": false
},
"test_reads.fa:md5,d41d8cd98f00b204e9800998ecf8427e",
"test_readsreads_vs_refdb.arf:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,33c794292d6772d67fa8001439394614"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-20T20:58:19.544297445"
},
"mirdeep2 - mapper - fasta celegans": {
"content": [
[
"versions.yml:md5,33c794292d6772d67fa8001439394614"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-17T17:41:05.101661825"
}
}
11 changes: 11 additions & 0 deletions modules/nf-core/mirdeep2/mapper/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
process {
withName: 'MIRDEEP2_MAPPER' {
ext.args = "-c -j -k TCGTATGCCGTCTTCTGCTTGT -l 18 -m -v"
}

withName: 'SEQKIT_REPLACE' {
ext.args = "-p '\s.+'"
ext.suffix = "fasta"
}

}
7 changes: 7 additions & 0 deletions modules/nf-core/mirdeep2/mirdeep2/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::mirdeep2=2.0.1.2"
64 changes: 64 additions & 0 deletions modules/nf-core/mirdeep2/mirdeep2/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
process MIRDEEP2_MIRDEEP2 {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.2--0':
'biocontainers/mirdeep2:2.0.1.2--0' }"

input:
tuple val(meta), path(processed_reads), path(genome_mappings)
tuple val(meta2), path(fasta)
tuple val(meta3), path(mature), path(hairpin), path(mature_other_species)

output:
tuple val(meta), path("result*.{bed,csv,html}") , emit: outputs
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '2.0.1'
def mature_species = mature ? "${mature}" : "none"
def mature_other = mature_other_species ? "${mature_other_species}": "none"
def precursors = hairpin ? "${hairpin}" : "none"

"""
miRDeep2.pl \\
$processed_reads \\
$fasta \\
$genome_mappings \\
$mature_species \\
$mature_other \\
$precursors \\
$args

mv result_*.bed result_${prefix}.bed
mv result_*.csv result_${prefix}.csv
mv result_*.html result_${prefix}.html

cat <<-END_VERSIONS > versions.yml
"${task.process}":
mirdeep2: \$(echo "$VERSION")
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '2.0.1'
"""
touch result_${prefix}.html
touch result_${prefix}.bed
touch result_${prefix}.csv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
mirdeep2: \$(echo "$VERSION")
END_VERSIONS
"""
}
Loading
Loading