Skip to content

Commit

Permalink
Added samshee (#6749)
Browse files Browse the repository at this point in the history
* Added samshee

* Fix pre-commit

* Fix linting

* Add more options

* Remove nextflow.config

* Addressing PR comments

* Add python version

* Add file schema testing and missing arg

* Fixing test

* Fix test

* fix tests conda

* Fix conda env.yml

---------

Co-authored-by: zxBIB Schcolnicov <[email protected]>
  • Loading branch information
nschcolnicov and zxBIB Schcolnicov authored Oct 16, 2024
1 parent eaeecb1 commit 3c464e7
Show file tree
Hide file tree
Showing 6 changed files with 301 additions and 0 deletions.
6 changes: 6 additions & 0 deletions modules/nf-core/samshee/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::samshee=0.2.1
- python=3.13.0
47 changes: 47 additions & 0 deletions modules/nf-core/samshee/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
process SAMSHEE {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/65/659cdc3068a6fbce17ccb199bb3afc8600c65940743c1a0214b3bf0eed4df1a3/data' :
'community.wave.seqera.io/library/pip_samshee:9b655e3c18eee356' }"

input:
tuple val(meta), path(samplesheet)
path(file_schema_validator)

output:
tuple val(meta), path("*_formatted.csv"), emit: samplesheet
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def arg_file_schema_validator = file_schema_validator ? "--schema '{\"\$ref\": \"file:${file_schema_validator}\"}'" : ""
def args = task.ext.args ?: ""
"""
# Run validation command and capture output
python -m samshee $samplesheet \
$args \
$arg_file_schema_validator \
> ${samplesheet.baseName}_formatted.csv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samshee: \$( python -m pip show --version samshee | grep "Version" | sed -e "s/Version: //g" )
python: \$( python --version | sed -e "s/Python //g" )
END_VERSIONS
"""

stub:
"""
touch ${samplesheet.baseName}_formatted.csv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samshee: \$( python -m pip show --version samshee | grep "Version" | sed -e "s/Version: //g" )
python: \$( python --version | sed -e "s/Python //g" )
END_VERSIONS
"""
}
47 changes: 47 additions & 0 deletions modules/nf-core/samshee/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: samshee
description: Module to validate illumina® Sample Sheet v2 files.
keywords:
- samplesheet
- illumina
- bclconvert
- bcl2fastq
tools:
- samshee:
description: A schema-agnostic parser and writer for illumina® sample sheets v2 and similar documents.
homepage: https://github.com/lit-regensburg/samshee
documentation: https://github.com/lit-regensburg/samshee/blob/main/README.md
tool_dev_url: https://github.com/lit-regensburg/samshee
licence: [MIT license]
identifier: ""
input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', lane:1 ]
- samplesheet:
type: file
description: "illumina v2 samplesheet"
pattern: "*.{csv}"
- - file_schema_validator:
type: string
description: "Optional JSON file used additional samplesheet validation settings"
output:
- samplesheet:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', lane:1 ]
- "*_formatted.csv":
type: file
description: "illumina v2 samplesheet"
- versions:
- versions.yml:
type: file
description: File containing software version
pattern: "versions.yml"
authors:
- "@nschcolnicov"
maintainers:
- "@nschcolnicov"
85 changes: 85 additions & 0 deletions modules/nf-core/samshee/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
nextflow_process {

name "Test Process samshee"
script "../main.nf"
process "SAMSHEE"
config "./nextflow.config"
tag "modules"
tag "modules_nfcore"
tag "samshee"

test("test samplesheet_v1") {

when {
params {
v1_schema = true
json_schema_validator = '{"required": ["Data"]}'
name_schema_validator = null
}
process {
"""
input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/miseq_35147139/miseq_35147139_samplesheet.csv", checkIfExists: true) ]
input[1] = file("schema.json")
new File("schema.json").text = '''{
"\$schema": "https://json-schema.org/draft/2020-12/schema",
"required": ["Settings"]
}'''
"""
}
}

then {
assert process.success
assert snapshot(process.out).match()
}

}
test("test samplesheet_v2") {

when {
params {
v1_schema = null
json_schema_validator = null
name_schema_validator = '{"$ref": "urn:samshee:illuminav2/v1"}'
}
process {
"""
input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/NextSeq2000/SampleSheet.csv", checkIfExists: true) ]
input[1] = []
"""
}
}

then {
assert process.success
assert snapshot(process.out).match()
}

}

test("stub") {

options "-stub"

when {
params {
v1_schema = null
json_schema_validator = null
name_schema_validator = null
}
process {
"""
input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/NextSeq2000/SampleSheet.csv", checkIfExists: true) ]
input[1] = []
"""
}
}

then {
assert process.success
assert snapshot(process.out).match()
}

}

}
107 changes: 107 additions & 0 deletions modules/nf-core/samshee/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
{
"stub": {
"content": [
{
"0": [
[
{
"id": "test",
"lane": 1
},
"SampleSheet_formatted.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
"versions.yml:md5,77af0194d386117bf52aaabdf350a976"
],
"samplesheet": [
[
{
"id": "test",
"lane": 1
},
"SampleSheet_formatted.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,77af0194d386117bf52aaabdf350a976"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-16T15:25:40.722007136"
},
"test samplesheet_v1": {
"content": [
{
"0": [
[
{
"id": "test",
"lane": 1
},
"miseq_35147139_samplesheet_formatted.csv:md5,2a6ee5b13242aeefdeeaa98671f1ee26"
]
],
"1": [
"versions.yml:md5,77af0194d386117bf52aaabdf350a976"
],
"samplesheet": [
[
{
"id": "test",
"lane": 1
},
"miseq_35147139_samplesheet_formatted.csv:md5,2a6ee5b13242aeefdeeaa98671f1ee26"
]
],
"versions": [
"versions.yml:md5,77af0194d386117bf52aaabdf350a976"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-16T15:25:02.353128191"
},
"test samplesheet_v2": {
"content": [
{
"0": [
[
{
"id": "test",
"lane": 1
},
"SampleSheet_formatted.csv:md5,9a1cac9e958256a17c7f43a8e15cb697"
]
],
"1": [
"versions.yml:md5,77af0194d386117bf52aaabdf350a976"
],
"samplesheet": [
[
{
"id": "test",
"lane": 1
},
"SampleSheet_formatted.csv:md5,9a1cac9e958256a17c7f43a8e15cb697"
]
],
"versions": [
"versions.yml:md5,77af0194d386117bf52aaabdf350a976"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-16T15:25:24.540910786"
}
}
9 changes: 9 additions & 0 deletions modules/nf-core/samshee/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
process {
withName: SAMSHEE {
ext.args = [
params.json_schema_validator ? "--schema '${params.json_schema_validator}'" : "",
params.name_schema_validator ? "--schema '${params.name_schema_validator}'" : "",
params.v1_schema ? "--output-format sectioned" : "",
].join(" ").trim()
}
}

0 comments on commit 3c464e7

Please sign in to comment.