Purecn/run (nf-core#3140)

* Set up nf-core template for purecn/run module * Add run script and I/O * Clean TODOs and set main script * Fix duplicate entry * Set up main script * [ci skip] Format with prettier * [ci skip] Address review question and set up stub * [ci skip] Set stub for testing * Reformat with prettier * [CI skip] Adjust some typos * [CI skip] Set up test script using stub * Set up test yml * Reformat with prettier * Set up meta.yml file and fix typos * Reformat with prettier * Fix typo * Remove quay.io from container string * Remove optional input argument * Add optional outputs and clean non-mandatory parameters * Clean non-mandatory outputs * Reformat outputs * Fix minor and address reviews
limrp · Jul 28, 2023 · 8f61632 · 8f61632
1 parent 4fcf729
commit 8f61632
Show file tree

Hide file tree

Showing 6 changed files with 232 additions and 0 deletions.
diff --git a/modules/nf-core/purecn/run/main.nf b/modules/nf-core/purecn/run/main.nf
@@ -0,0 +1,73 @@
+process PURECN_RUN {
+    tag "$meta.id"
+    label 'process_medium'
+
+    // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
+    conda "bioconda::bioconductor-purecn=2.4.0 bioconda::bioconductor-txdb.hsapiens.ucsc.hg38.knowngene=3.16.0 bioconductor-txdb.hsapiens.ucsc.hg19.knowngene=3.2.2 bioconda::bioconductor-org.hs.eg.db=3.16.0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-582ac26068889091d5e798347c637f8208d77a71:a29c64a63498b1ee8b192521fdf6ed3c65506994-0':
+        'biocontainers/mulled-v2-582ac26068889091d5e798347c637f8208d77a71:a29c64a63498b1ee8b192521fdf6ed3c65506994-0' }"
+
+    input:
+    tuple val(meta), path(intervals), path(coverage)
+    path normal_db
+    val genome
+
+    output:
+    tuple val(meta), path("*.pdf")                             , emit: pdf
+    tuple val(meta), path("*_local_optima.pdf")                , emit: local_optima_pdf
+    tuple val(meta), path("*_dnacopy.seg")                     , emit: seg
+    tuple val(meta), path("*_genes.csv")                       , emit: genes_csv                   , optional: true
+    tuple val(meta), path("*_amplification_pvalues.csv")       , emit: amplification_pvalues_csv   , optional: true
+    tuple val(meta), path("*.vcf.gz")                          , emit: vcf_gz                      , optional: true
+    tuple val(meta), path("*_variants.csv")                    , emit: variants_csv                , optional: true
+    tuple val(meta), path("*_loh.csv")                         , emit: loh_csv                     , optional: true
+    tuple val(meta), path("*_chromosomes.pdf")                 , emit: chr_pdf                     , optional: true
+    tuple val(meta), path("*_segmentation.pdf")                , emit: segmentation_pdf            , optional: true
+    tuple val(meta), path("*_multisample.seg")                 , emit: multisample_seg             , optional: true
+    path "versions.yml"                                        , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def VERSION = '2.4.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+
+    """
+    library_path=\$(Rscript -e 'cat(.libPaths(), sep = "\\n")')
+    Rscript "\$library_path"/PureCN/extdata/PureCN.R \\
+        --out ./ \\
+        --tumor ${coverage} \\
+        --sampleid ${prefix} \\
+        --normaldb ${normal_db} \\
+        --intervals ${intervals} \\
+        --genome ${genome} \\
+        --parallel \\
+        --cores ${task.cpus} \\
+        --stats-file ${prefix}_stats.txt \\
+        ${args}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        purecn: ${VERSION}
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def VERSION = '2.4.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+
+    """
+    touch ${prefix}.pdf
+    touch ${prefix}_local_optima.pdf
+    touch ${prefix}_dnacopy.seg
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        purecn: ${VERSION}
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/purecn/run/meta.yml b/modules/nf-core/purecn/run/meta.yml
@@ -0,0 +1,105 @@
+name: "purecn_run"
+description: Run PureCN workflow to normalize, segment and determine purity and ploidy
+keywords:
+  - copy number alteration calling
+  - hybrid capture sequencing
+  - targeted sequencing
+  - DNA sequencing
+tools:
+  - "purecn":
+      description: "Copy number calling and SNV classification using targeted short read sequencing"
+      homepage: "https://bioconductor.org/packages/release/bioc/html/PureCN.html"
+      documentation: "https://bioconductor.org/packages/release/bioc/html/PureCN.html"
+      tool_dev_url: "https://github.com/lima1/PureCN"
+      doi: "10.1186/s13029-016-0060-z"
+      licence: "Artistic-2.0"
+      args_id: "$args"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test' ]
+  - intervals:
+      type: file
+      description: |
+        BED file of target intervals, generated from IntervalFile.R
+      pattern: "{*.bed,*.txt}"
+  - coverage:
+      type: file
+      description: Coverage file generated from Coverage.R
+      pattern: "*.txt"
+  - normaldb:
+      type: file
+      description: |
+        Normal panel in RDS format, generated from NormalDB.R
+      pattern: "*.rds"
+  - genome:
+      type: string
+      description: Genome build
+
+output:
+  - pdf:
+      type: file
+      description: |
+        PDF file containing copy number plots
+      pattern: "*.pdf"
+  - local_optima_pdf:
+      type: file
+      description: |
+        PDF file containing local optima plots
+      pattern: "*_local_optima.pdf"
+  - seg:
+      type: file
+      description: |
+        Tab-delimited file containing segmentation results
+      pattern: "*_dnacopy.seg"
+  - genes_csv:
+      type: file
+      description: |
+        CSV file containing gene copy number calls. Optional
+      pattern: "*_genes.csv"
+  - amplification_pvalues_csv:
+      type: file
+      description: |
+        CSV file containing amplification p-values. Optional
+      pattern: "*_amplification_pvalues.csv"
+  - vcf_gz:
+      type: file
+      description: |
+        GZipped VCF file containing SNV calls. Optional
+      pattern: "*.vcf.gz"
+  - variants_csv:
+      type: file
+      description: |
+        CSV file containing SNV calls. Optional
+      pattern: "*_variants.csv"
+  - loh_csv:
+      type: file
+      description: |
+        CSV file containing LOH calls. Optional
+      pattern: "*_loh.csv"
+  - chr_pdf:
+      type: file
+      description: |
+        PDF file containing chromosome plots. Optional
+      pattern: "*_chromosomes.pdf"
+  - segmentation_pdf:
+      type: file
+      description: |
+        PDF file containing segmentation plots. Optional
+      pattern: "*_segmentation.pdf"
+  - multisample.seg:
+      type: file
+      description: |
+        Tab-delimited file containing segmentation data from multiple samples. Optional
+      pattern: "*_multisample.seg"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@aldosr"
+  - "@lbeltrame"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
@@ -2907,6 +2907,10 @@ purecn/normaldb:
   - modules/nf-core/purecn/normaldb/**
   - tests/modules/nf-core/purecn/normaldb/**
 
+purecn/run:
+  - modules/nf-core/purecn/run/**
+  - tests/modules/nf-core/purecn/run/**
+
 purgedups/calcuts:
   - modules/nf-core/purgedups/calcuts/**
   - tests/modules/nf-core/purgedups/calcuts/**

diff --git a/tests/modules/nf-core/purecn/run/main.nf b/tests/modules/nf-core/purecn/run/main.nf
@@ -0,0 +1,35 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { PURECN_RUN } from '../../../../../modules/nf-core/purecn/run/main.nf'
+
+process STUB_PURECN_RUN {
+    output:
+    path("*.txt")                , emit: intervals
+    path("*.txt")                , emit: coverage
+    path("*.rds")                , emit: normal_db
+
+    stub:
+    """
+    touch interval_file.txt
+    touch coverage.txt
+    touch normal_db.rds
+    """
+}
+
+workflow test_purecn_run {
+
+    STUB_PURECN_RUN()
+
+    input = [
+        [ id:'test'],
+        file("interval_file.txt"),
+        file("coverage.txt")
+    ]
+
+    normal_db = file("normal_db.rds")
+    genome = "hg38"
+
+    PURECN_RUN ( input, normal_db, genome )
+}
diff --git a/tests/modules/nf-core/purecn/run/nextflow.config b/tests/modules/nf-core/purecn/run/nextflow.config
@@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+
+}
diff --git a/tests/modules/nf-core/purecn/run/test.yml b/tests/modules/nf-core/purecn/run/test.yml
@@ -0,0 +1,10 @@
+- name: purecn run
+  command: nextflow run ./tests/modules/nf-core/purecn/run -entry test_purecn_run -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/purecn/run/nextflow.config -stub-run
+  tags:
+    - purecn
+    - purecn/run
+  files:
+    - path: output/purecn/test.pdf
+    - path: output/purecn/test_dnacopy.seg
+    - path: output/purecn/test_local_optima.pdf
+    - path: output/purecn/versions.yml