Skip to content

Commit

Permalink
Purecn/run (nf-core#3140)
Browse files Browse the repository at this point in the history
* Set up nf-core template for purecn/run module

* Add run script and I/O

* Clean TODOs and set main script

* Fix duplicate entry

* Set up main script

* [ci skip] Format with prettier

* [ci skip] Address review question and set up stub

* [ci skip] Set stub for testing

* Reformat with prettier

* [CI skip] Adjust some typos

* [CI skip] Set up test script using stub

* Set up test yml

* Reformat with prettier

* Set up meta.yml file and fix typos

* Reformat with prettier

* Fix typo

* Remove quay.io from container string

* Remove optional input argument

* Add optional outputs and clean non-mandatory parameters

* Clean non-mandatory outputs

* Reformat outputs

* Fix minor and address reviews
  • Loading branch information
aldosr authored and limrp committed Jul 28, 2023
1 parent 4fcf729 commit 8f61632
Show file tree
Hide file tree
Showing 6 changed files with 232 additions and 0 deletions.
73 changes: 73 additions & 0 deletions modules/nf-core/purecn/run/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
process PURECN_RUN {
tag "$meta.id"
label 'process_medium'

// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
conda "bioconda::bioconductor-purecn=2.4.0 bioconda::bioconductor-txdb.hsapiens.ucsc.hg38.knowngene=3.16.0 bioconductor-txdb.hsapiens.ucsc.hg19.knowngene=3.2.2 bioconda::bioconductor-org.hs.eg.db=3.16.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-582ac26068889091d5e798347c637f8208d77a71:a29c64a63498b1ee8b192521fdf6ed3c65506994-0':
'biocontainers/mulled-v2-582ac26068889091d5e798347c637f8208d77a71:a29c64a63498b1ee8b192521fdf6ed3c65506994-0' }"

input:
tuple val(meta), path(intervals), path(coverage)
path normal_db
val genome

output:
tuple val(meta), path("*.pdf") , emit: pdf
tuple val(meta), path("*_local_optima.pdf") , emit: local_optima_pdf
tuple val(meta), path("*_dnacopy.seg") , emit: seg
tuple val(meta), path("*_genes.csv") , emit: genes_csv , optional: true
tuple val(meta), path("*_amplification_pvalues.csv") , emit: amplification_pvalues_csv , optional: true
tuple val(meta), path("*.vcf.gz") , emit: vcf_gz , optional: true
tuple val(meta), path("*_variants.csv") , emit: variants_csv , optional: true
tuple val(meta), path("*_loh.csv") , emit: loh_csv , optional: true
tuple val(meta), path("*_chromosomes.pdf") , emit: chr_pdf , optional: true
tuple val(meta), path("*_segmentation.pdf") , emit: segmentation_pdf , optional: true
tuple val(meta), path("*_multisample.seg") , emit: multisample_seg , optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '2.4.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.

"""
library_path=\$(Rscript -e 'cat(.libPaths(), sep = "\\n")')
Rscript "\$library_path"/PureCN/extdata/PureCN.R \\
--out ./ \\
--tumor ${coverage} \\
--sampleid ${prefix} \\
--normaldb ${normal_db} \\
--intervals ${intervals} \\
--genome ${genome} \\
--parallel \\
--cores ${task.cpus} \\
--stats-file ${prefix}_stats.txt \\
${args}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
purecn: ${VERSION}
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '2.4.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.

"""
touch ${prefix}.pdf
touch ${prefix}_local_optima.pdf
touch ${prefix}_dnacopy.seg
cat <<-END_VERSIONS > versions.yml
"${task.process}":
purecn: ${VERSION}
END_VERSIONS
"""
}
105 changes: 105 additions & 0 deletions modules/nf-core/purecn/run/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
name: "purecn_run"
description: Run PureCN workflow to normalize, segment and determine purity and ploidy
keywords:
- copy number alteration calling
- hybrid capture sequencing
- targeted sequencing
- DNA sequencing
tools:
- "purecn":
description: "Copy number calling and SNV classification using targeted short read sequencing"
homepage: "https://bioconductor.org/packages/release/bioc/html/PureCN.html"
documentation: "https://bioconductor.org/packages/release/bioc/html/PureCN.html"
tool_dev_url: "https://github.com/lima1/PureCN"
doi: "10.1186/s13029-016-0060-z"
licence: "Artistic-2.0"
args_id: "$args"

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- intervals:
type: file
description: |
BED file of target intervals, generated from IntervalFile.R
pattern: "{*.bed,*.txt}"
- coverage:
type: file
description: Coverage file generated from Coverage.R
pattern: "*.txt"
- normaldb:
type: file
description: |
Normal panel in RDS format, generated from NormalDB.R
pattern: "*.rds"
- genome:
type: string
description: Genome build

output:
- pdf:
type: file
description: |
PDF file containing copy number plots
pattern: "*.pdf"
- local_optima_pdf:
type: file
description: |
PDF file containing local optima plots
pattern: "*_local_optima.pdf"
- seg:
type: file
description: |
Tab-delimited file containing segmentation results
pattern: "*_dnacopy.seg"
- genes_csv:
type: file
description: |
CSV file containing gene copy number calls. Optional
pattern: "*_genes.csv"
- amplification_pvalues_csv:
type: file
description: |
CSV file containing amplification p-values. Optional
pattern: "*_amplification_pvalues.csv"
- vcf_gz:
type: file
description: |
GZipped VCF file containing SNV calls. Optional
pattern: "*.vcf.gz"
- variants_csv:
type: file
description: |
CSV file containing SNV calls. Optional
pattern: "*_variants.csv"
- loh_csv:
type: file
description: |
CSV file containing LOH calls. Optional
pattern: "*_loh.csv"
- chr_pdf:
type: file
description: |
PDF file containing chromosome plots. Optional
pattern: "*_chromosomes.pdf"
- segmentation_pdf:
type: file
description: |
PDF file containing segmentation plots. Optional
pattern: "*_segmentation.pdf"
- multisample.seg:
type: file
description: |
Tab-delimited file containing segmentation data from multiple samples. Optional
pattern: "*_multisample.seg"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@aldosr"
- "@lbeltrame"
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2907,6 +2907,10 @@ purecn/normaldb:
- modules/nf-core/purecn/normaldb/**
- tests/modules/nf-core/purecn/normaldb/**

purecn/run:
- modules/nf-core/purecn/run/**
- tests/modules/nf-core/purecn/run/**

purgedups/calcuts:
- modules/nf-core/purgedups/calcuts/**
- tests/modules/nf-core/purgedups/calcuts/**
Expand Down
35 changes: 35 additions & 0 deletions tests/modules/nf-core/purecn/run/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { PURECN_RUN } from '../../../../../modules/nf-core/purecn/run/main.nf'

process STUB_PURECN_RUN {
output:
path("*.txt") , emit: intervals
path("*.txt") , emit: coverage
path("*.rds") , emit: normal_db

stub:
"""
touch interval_file.txt
touch coverage.txt
touch normal_db.rds
"""
}

workflow test_purecn_run {

STUB_PURECN_RUN()

input = [
[ id:'test'],
file("interval_file.txt"),
file("coverage.txt")
]

normal_db = file("normal_db.rds")
genome = "hg38"

PURECN_RUN ( input, normal_db, genome )
}
5 changes: 5 additions & 0 deletions tests/modules/nf-core/purecn/run/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

}
10 changes: 10 additions & 0 deletions tests/modules/nf-core/purecn/run/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
- name: purecn run
command: nextflow run ./tests/modules/nf-core/purecn/run -entry test_purecn_run -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/purecn/run/nextflow.config -stub-run
tags:
- purecn
- purecn/run
files:
- path: output/purecn/test.pdf
- path: output/purecn/test_dnacopy.seg
- path: output/purecn/test_local_optima.pdf
- path: output/purecn/versions.yml

0 comments on commit 8f61632

Please sign in to comment.