Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding seqkit concat #4841

Merged
merged 23 commits into from
Feb 9, 2024
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
d13a24d
Adding seqkit concat
DLBPointon Feb 2, 2024
7c7756c
Merge branch 'master' into dp24_seqkit_concat
DLBPointon Feb 2, 2024
150e0f9
Prettier
DLBPointon Feb 2, 2024
201a306
Updates
DLBPointon Feb 2, 2024
26c5b66
Merge branch 'dp24_seqkit_concat' of https://github.com/nf-core/modul…
DLBPointon Feb 2, 2024
cf23803
Update format
DLBPointon Feb 2, 2024
8460953
Correct md5sum
DLBPointon Feb 5, 2024
5d607e6
Updated to 2.7.0, further attempts to get it working
DLBPointon Feb 6, 2024
405208e
Corrected tests
DLBPointon Feb 6, 2024
36e22b7
Corrected tests
DLBPointon Feb 6, 2024
444f08f
Corrected tests
DLBPointon Feb 6, 2024
470993b
Update modules/nf-core/seqkit/concat/tests/main.nf.test
DLBPointon Feb 6, 2024
3af47d6
Merge branch 'master' into dp24_seqkit_concat
DLBPointon Feb 6, 2024
1edc702
Merge branch 'master' into dp24_seqkit_concat
DLBPointon Feb 7, 2024
4b2b744
Update modules/nf-core/seqkit/concat/tests/main.nf.test
heuermh Feb 7, 2024
66cad08
Update modules/nf-core/seqkit/concat/tests/main.nf.test
DLBPointon Feb 7, 2024
e43c62a
Update modules/nf-core/seqkit/concat/tests/main.nf.test
DLBPointon Feb 7, 2024
9d2a0d3
Merge branch 'master' into dp24_seqkit_concat
DLBPointon Feb 7, 2024
378224b
Updating Snapshot
DLBPointon Feb 7, 2024
48049d0
Merge branch 'master' into dp24_seqkit_concat
DLBPointon Feb 7, 2024
3ef8991
Merge branch 'master' into dp24_seqkit_concat
DLBPointon Feb 9, 2024
5c27dfb
Update main.nf
DLBPointon Feb 9, 2024
05c1b7f
Merge branch 'master' into dp24_seqkit_concat
DLBPointon Feb 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions modules/nf-core/seqkit/concat/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "seqkit_concat"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::seqkit=2.7.0"
47 changes: 47 additions & 0 deletions modules/nf-core/seqkit/concat/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
process SEQKIT_CONCAT {
tag "$meta.id"
label 'process_low'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/seqkit:2.7.0--h9ee0642_0':
'biocontainers/seqkit:2.7.0--h9ee0642_0' }"

input:
tuple val(meta), path(input, stageAs: 'in/*')

output:
tuple val(meta), path("*.{fasta,fastq,fa,fq,fas,fna,faa}"), emit: fastx
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ""
def prefix = task.ext.prefix ?: "${meta.id}"
def file_type = input instanceof List ? input[0].getExtension() : input.getExtension()
"""
seqkit \\
concat \\
$args \\
in/* > ${prefix}.${file_type}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqkit: \$(seqkit version | cut -d' ' -f2)
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would remove def args in stub if it is not used. Actually I don't know if there is a recommendation, but it saves 1 line of code 🤷

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the other hand, it keeps it a bit more similar to the normal test. So whichever is fine, I think.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be fair, in my other modules, I have only kept prefixes because they are actually used. So removing it is good for consistency at least? Like you said it is a waste line.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do the same for my modules ;) So feel free to update that and then ready to merge 🚀

def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.fasta

cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqkit: \$(seqkit version | cut -d' ' -f2)
END_VERSIONS
"""
}
54 changes: 54 additions & 0 deletions modules/nf-core/seqkit/concat/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "seqkit_concat"
description: Concatenating multiple uncompressed sequence files together
keywords:
- concat
- fasta
- fastq
- merge
tools:
- seqkit:
description: |
Cross-platform and ultrafast toolkit for FASTA/Q file manipulation, written by Wei Shen.
homepage: https://github.com/shenwei356/seqkit
documentation: https://bioinf.shenwei.me/seqkit/
tool_dev_url: https://github.com/shenwei356/seqkit
doi: 10.1371/journal.pone.0163962
licence: ["MIT"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`

- input:
type: file
description: Sequence file in fasta/q format
pattern: "*.{fasta,fastq,fa,fq,fas,fna,faa}"

## TODO nf-core: Add a description of all of the variables used as output
output:
#Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`

- fastx:
type: file
description: A concatenated sequence file
pattern: "*.{fasta,fastq,fa,fq,fas,fna,faa}"

- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@DLBPointon"
maintainers:
- "@DLBPointon"
66 changes: 66 additions & 0 deletions modules/nf-core/seqkit/concat/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
nextflow_process {

name "Test Process SEQKIT_CONCAT"
script "../main.nf"
process "SEQKIT_CONCAT"
config "./nextflow.config"

tag "modules"
tag "modules_nfcore"
tag "seqkit"
tag "seqkit/concat"

test("sarscov2 and human primers - fasta") {
when {
process {
"""
input[0] = [
[ id:'test' ], // meta map
[
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['homo_sapiens']['pacbio']['primers'], checkIfExists: true )
]
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match()}
)
with(process.out.fastx) {
// Including headers from both input files
assert path(get(0).get(1)).readLines().any { it.contains('>NEB_Clontech_3p') }
assert path(get(0).get(1)).readLines().any { it.contains('>MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome') }
}
}
}

test("sarscov2 and human primers - fasta - stub") {

options '-stub'

when {
process {
"""
input[0] = [
[ id:'test' ], // meta map
[
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['homo_sapiens']['pacbio']['primers'], checkIfExists: true )
]
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match()}
)
}
}
}
18 changes: 18 additions & 0 deletions modules/nf-core/seqkit/concat/tests/main.nf.test.snap

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions modules/nf-core/seqkit/concat/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {
withName: SEQKIT_CONCAT {
ext.args = "--full"
}
}
2 changes: 2 additions & 0 deletions modules/nf-core/seqkit/concat/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
seqkit/concat:
- "modules/nf-core/seqkit/concat/**"
Loading