Skip to content

Commit

Permalink
Add UPP (#6546)
Browse files Browse the repository at this point in the history
* Add UPP

* fix lint

* Update UPP

* fix prettier

* Update modules/nf-core/upp/align/tests/main.nf.test

Co-authored-by: Simon Pearce <[email protected]>

* Update modules/nf-core/upp/align/tests/main.nf.test

Co-authored-by: Simon Pearce <[email protected]>

* fixes

* update

* update tests

* update

* update

* Update modules/nf-core/upp/align/main.nf

Co-authored-by: Simon Pearce <[email protected]>

---------

Co-authored-by: Simon Pearce <[email protected]>
  • Loading branch information
luisas and SPPearce authored Sep 17, 2024
1 parent 7b9ce4b commit 3be751e
Show file tree
Hide file tree
Showing 7 changed files with 340 additions and 0 deletions.
6 changes: 6 additions & 0 deletions modules/nf-core/upp/align/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::sepp=4.5.5
- conda-forge::pigz=2.8
71 changes: 71 additions & 0 deletions modules/nf-core/upp/align/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
process UPP_ALIGN {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'oras://community.wave.seqera.io/library/sepp_pigz:d72591720d0277b1':
'community.wave.seqera.io/library/sepp_pigz:ea6dbc7704a2e251' }"

input:
tuple val(meta) , path(fasta)
tuple val(meta2), path(tree)
val(compress)

output:
tuple val(meta), path("*.aln{.gz,}"), emit: alignment
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def tree_args = tree ? "-t $tree" : ""
"""
if [ "$workflow.containerEngine" = 'singularity' ]; then
export CONDA_PREFIX="/opt/conda/"
export PASTA_TOOLS_DEVDIR="/opt/conda/bin/"
fi
run_upp.py \\
$args \\
-x $task.cpus \\
-s ${fasta} \\
-d . \\
-o ${prefix} \\
-p ./upp-temporary
mv ${prefix}_alignment.fasta ${prefix}.aln
if ${compress}; then
pigz -p ${task.cpus} ${prefix}.aln
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
upp: \$(run_upp.py -v | grep "run_upp" | cut -f2 -d" ")
pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ))
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
if [ "$compress" = true ]; then
echo | gzip > "${prefix}.aln.gz"
else
touch "${prefix}.aln"
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
upp: \$(run_upp.py -v | grep "run_upp" | cut -f2 -d" ")
pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ))
END_VERSIONS
"""
}
57 changes: 57 additions & 0 deletions modules/nf-core/upp/align/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: "upp_align"
description: Aligns protein structures using UPP
keywords:
- alignment
- MSA
- genomics
- structure
tools:
- "upp":
description: "SATe-enabled phylogenetic placement"
homepage: "https://github.com/smirarab/sepp/tree/master"
documentation: "https://github.com/smirarab/sepp/blob/master/README.UPP.md"
tool_dev_url: "https://github.com/smirarab/sepp/tree/master"
doi: "10.1093/bioinformatics/btad007"
licence: ["GPL v3"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test']`
- fasta:
type: file
description: Input sequences in FASTA format
pattern: "*.{fa,fasta}"
- meta2:
type: map
description: |
Groovy Map containing tree information
e.g. `[ id:'test_tree']`
- tree:
type: file
description: Input guide tree in Newick format
pattern: "*.{dnd}"
- compress:
type: boolean
description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test']`
- alignment:
type: file
description: Alignment file, in FASTA format. May be gzipped or uncompressed, depending on if compress is set to true or false
pattern: "*.aln{.gz,}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@luisas"
maintainers:
- "@luisas"
96 changes: 96 additions & 0 deletions modules/nf-core/upp/align/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
nextflow_process {

name "Test Process UPP_ALIGN"
script "../main.nf"
process "UPP_ALIGN"

tag "modules"
tag "modules_nfcore"
tag "upp"
tag "upp/align"
tag "famsa/guidetree"

test("fasta - align_sequence - uncompressed") {
config "./nextflow.config"

when {
process {
"""
input[0] = [ [ id:'test' ],
file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true)
]
input[1] = [[:],[]]
input[2] = false
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("fasta - with_tree - compressed") {
config "./nextflow.config"

setup {

run("FAMSA_GUIDETREE") {
script "../../../famsa/guidetree/main.nf"
process {
"""
input[0] = [ [ id:'tree' ],
file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true)
]
"""
}
}
}
when {
process {
"""
input[0] = [ [ id:'test_tree' ],
file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true)
]
input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_tree'], tree]}
input[2] = true
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match()}
)
}
}

test("stub") {
config "./nextflow.config"

options "-stub"
when {
process {
"""
input[0] = [ [ id:'test' ],
file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true)
]
input[1] = [[:],[]]
input[2] = false
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match()}
)
}
}
}
101 changes: 101 additions & 0 deletions modules/nf-core/upp/align/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
{
"fasta - align_sequence - uncompressed": {
"content": [
{
"0": [
[
{
"id": "test"
},
"test.aln:md5,e6b5291e9cdb40e9b7c72688e4da533b"
]
],
"1": [
"versions.yml:md5,b431bb15ae86dcd4485d921df1752a98"
],
"alignment": [
[
{
"id": "test"
},
"test.aln:md5,e6b5291e9cdb40e9b7c72688e4da533b"
]
],
"versions": [
"versions.yml:md5,b431bb15ae86dcd4485d921df1752a98"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-17T07:51:30.876772941"
},
"stub": {
"content": [
{
"0": [
[
{
"id": "test"
},
"test.aln:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
"versions.yml:md5,b431bb15ae86dcd4485d921df1752a98"
],
"alignment": [
[
{
"id": "test"
},
"test.aln:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,b431bb15ae86dcd4485d921df1752a98"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-17T07:36:19.135281969"
},
"fasta - with_tree - compressed": {
"content": [
{
"0": [
[
{
"id": "test_tree"
},
"test_tree.aln.gz:md5,e6b5291e9cdb40e9b7c72688e4da533b"
]
],
"1": [
"versions.yml:md5,b431bb15ae86dcd4485d921df1752a98"
],
"alignment": [
[
{
"id": "test_tree"
},
"test_tree.aln.gz:md5,e6b5291e9cdb40e9b7c72688e4da533b"
]
],
"versions": [
"versions.yml:md5,b431bb15ae86dcd4485d921df1752a98"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-17T07:52:05.47226891"
}
}
7 changes: 7 additions & 0 deletions modules/nf-core/upp/align/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
process {

withName: "UPP_ALIGN" {
ext.args = { "-m amino" }
}

}
2 changes: 2 additions & 0 deletions modules/nf-core/upp/align/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
upp/align:
- "modules/nf-core/upp/align/**"

0 comments on commit 3be751e

Please sign in to comment.