Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update kalign module to work with compressed files. #5277

Merged
merged 7 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion modules/nf-core/kalign/align/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ channels:
- bioconda
- defaults
dependencies:
- bioconda::kalign3=3.3.5
- bioconda::kalign3=3.4.0
- conda-forge::pigz=2.8
20 changes: 12 additions & 8 deletions modules/nf-core/kalign/align/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,43 +4,47 @@ process KALIGN_ALIGN {

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/kalign3:3.3.5--hdbdd923_0':
'biocontainers/kalign3:3.3.5--hdbdd923_0' }"
'https://depot.galaxyproject.org/singularity/mulled-v2-5cd0277547c6b33133225c8ce14c0cf2a4396ea2:0a70b6d89a3e06fbdc4a735461e8b98ff32ee5de-0':
'biocontainers/mulled-v2-5cd0277547c6b33133225c8ce14c0cf2a4396ea2:0a70b6d89a3e06fbdc4a735461e8b98ff32ee5de-0' }"

input:
tuple val(meta), path(fasta)
val(compress)

output:
tuple val(meta), path("*.aln"), emit: alignment
path "versions.yml" , emit: versions
tuple val(meta), path("*.aln{.gz,}"), emit: alignment
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def write_output = compress ? ">(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "${prefix}.aln"
"""
unpigz -cdf $fasta | \\
kalign \\
$args \\
-i $fasta \\
-o ${prefix}.aln
-o ${write_output}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
kalign : \$(echo \$(kalign -v) | sed 's/kalign //g' )
kalign: \$(echo \$(kalign -v) | sed 's/kalign //g' )
pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ))
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.aln
touch ${prefix}.aln${compress ? '.gz' : ''}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
kalign : \$(echo \$(kalign -v) | sed 's/kalign //g' )
pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ))
END_VERSIONS
"""
}
11 changes: 7 additions & 4 deletions modules/nf-core/kalign/align/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@ input:
e.g. `[ id:'test']`
- fasta:
type: file
description: Input sequences in FASTA format
pattern: "*.{fa,fasta}"
description: Input sequences in FASTA format. May be gzipped or uncompressed.
pattern: "*.{fa,fasta}{.gz,}"
- compress:
type: boolean
description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded.
output:
- meta:
type: map
Expand All @@ -30,8 +33,8 @@ output:
e.g. `[ id:'test']`
- alignment:
type: file
description: Alignment file.
pattern: "*.{aln}"
description: Alignment file. May be gzipped or uncompressed, depending on if `compress` is set to `true` or `false`.
pattern: "*.{aln}{.gz,}"
- versions:
type: file
description: File containing software versions
Expand Down
29 changes: 25 additions & 4 deletions modules/nf-core/kalign/align/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,45 @@ nextflow_process {
tag "kalign"
tag "kalign/align"

test("sarscov2 - fasta") {
test("sarscov2 - fasta - uncompressed") {

when {
process {
"""
input[0] = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true)
]
input[1] = false
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.alignment).match("alignment")},
{ assert snapshot(process.out.versions).match("versions") }
{ assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - uncompressed")},
)
}
}
}

test("sarscov2 - fasta - compressed") {

when {
process {
"""
input[0] = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true)
]
input[1] = true
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - compressed")},
)
}
}
}
65 changes: 51 additions & 14 deletions modules/nf-core/kalign/align/tests/main.nf.test.snap

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading