Merge branch 'update_antismash-run' of github.com:jasmezz/modules int…

…o update_antismash-run
nf-core · Feb 9, 2024 · f03987c · f03987c
2 parents f6f7177 + 476d215
commit f03987c
Show file tree

Hide file tree

Showing 16 changed files with 1,403 additions and 3 deletions.
diff --git a/modules/nf-core/seqkit/concat/environment.yml b/modules/nf-core/seqkit/concat/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "seqkit_concat"
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - "bioconda::seqkit=2.7.0"
diff --git a/modules/nf-core/seqkit/concat/main.nf b/modules/nf-core/seqkit/concat/main.nf
@@ -0,0 +1,46 @@
+process SEQKIT_CONCAT {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/seqkit:2.7.0--h9ee0642_0':
+        'biocontainers/seqkit:2.7.0--h9ee0642_0' }"
+
+    input:
+    tuple val(meta), path(input, stageAs: 'in/*')
+
+    output:
+    tuple val(meta), path("*.{fasta,fastq,fa,fq,fas,fna,faa}"), emit: fastx
+    path "versions.yml",                                        emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args        = task.ext.args         ?: ""
+    def prefix      = task.ext.prefix       ?: "${meta.id}"
+    def file_type   = input instanceof List ? input[0].getExtension() : input.getExtension()
+    """
+    seqkit \\
+        concat \\
+        $args \\
+        in/* > ${prefix}.${file_type}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        seqkit: \$(seqkit version | cut -d' ' -f2)
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix  = task.ext.prefix   ?: "${meta.id}"
+    """
+    touch ${prefix}.fasta
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        seqkit: \$(seqkit version | cut -d' ' -f2)
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/seqkit/concat/meta.yml b/modules/nf-core/seqkit/concat/meta.yml
@@ -0,0 +1,54 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "seqkit_concat"
+description: Concatenating multiple uncompressed sequence files together
+keywords:
+  - concat
+  - fasta
+  - fastq
+  - merge
+tools:
+  - seqkit:
+      description: |
+        Cross-platform and ultrafast toolkit for FASTA/Q file manipulation, written by Wei Shen.
+      homepage: https://github.com/shenwei356/seqkit
+      documentation: https://bioinf.shenwei.me/seqkit/
+      tool_dev_url: https://github.com/shenwei356/seqkit
+      doi: 10.1371/journal.pone.0163962
+      licence: ["MIT"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1', single_end:false ]`
+
+  - input:
+      type: file
+      description: Sequence file in fasta/q format
+      pattern: "*.{fasta,fastq,fa,fq,fas,fna,faa}"
+
+## TODO nf-core: Add a description of all of the variables used as output
+output:
+  #Only when we have meta
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1', single_end:false ]`
+
+  - fastx:
+      type: file
+      description: A concatenated sequence file
+      pattern: "*.{fasta,fastq,fa,fq,fas,fna,faa}"
+
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@DLBPointon"
+maintainers:
+  - "@DLBPointon"
diff --git a/modules/nf-core/seqkit/concat/tests/main.nf.test b/modules/nf-core/seqkit/concat/tests/main.nf.test
@@ -0,0 +1,66 @@
+nextflow_process {
+
+    name "Test Process SEQKIT_CONCAT"
+    script "../main.nf"
+    process "SEQKIT_CONCAT"
+    config "./nextflow.config"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "seqkit"
+    tag "seqkit/concat"
+
+    test("sarscov2 and human primers - fasta") {
+        when {
+            process {
+                """
+                input[0] = [
+                    [   id:'test'   ], // meta map
+                    [
+                        file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
+                        file(params.test_data['homo_sapiens']['pacbio']['primers'], checkIfExists: true )
+                    ]
+                    ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.versions).match()}
+            )
+            with(process.out.fastx) {
+                // Including headers from both input files
+                assert path(get(0).get(1)).readLines().any { it.contains('>NEB_Clontech_3p') }
+                assert path(get(0).get(1)).readLines().any { it.contains('>MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome') }
+            }
+        }
+    }
+
+    test("sarscov2 and human primers - fasta - stub") {
+
+        options '-stub'
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [   id:'test'   ], // meta map
+                        [
+                        file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
+                        file(params.test_data['homo_sapiens']['pacbio']['primers'], checkIfExists: true )
+                        ]
+                    ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.versions).match()}
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/seqkit/concat/tests/main.nf.test.snap b/modules/nf-core/seqkit/concat/tests/main.nf.test.snap
diff --git a/modules/nf-core/seqkit/concat/tests/nextflow.config b/modules/nf-core/seqkit/concat/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: SEQKIT_CONCAT {
+        ext.args    = "--full"
+    }
+}
diff --git a/modules/nf-core/seqkit/concat/tests/tags.yml b/modules/nf-core/seqkit/concat/tests/tags.yml
@@ -0,0 +1,2 @@
+seqkit/concat:
+  - "modules/nf-core/seqkit/concat/**"
diff --git a/modules/nf-core/tximeta/tximport/environment.yml b/modules/nf-core/tximeta/tximport/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "tximeta_tximport"
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - "bioconda::bioconductor-tximeta=1.20.1"
diff --git a/modules/nf-core/tximeta/tximport/main.nf b/modules/nf-core/tximeta/tximport/main.nf
@@ -0,0 +1,48 @@
+process TXIMETA_TXIMPORT {
+    label "process_medium"
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/bioconductor-tximeta%3A1.20.1--r43hdfd78af_0' :
+        'biocontainers/bioconductor-tximeta:1.20.1--r43hdfd78af_0' }"
+
+    input:
+    tuple val(meta), path("quants/*")
+    tuple val(meta2), path(tx2gene)
+    tuple val(meta3), path(coldata)
+    val quant_type
+
+    output:
+    tuple val(meta), path("*gene_tpm.tsv")                 , emit: tpm_gene
+    tuple val(meta), path("*gene_counts.tsv")              , emit: counts_gene
+    tuple val(meta), path("*gene_counts_length_scaled.tsv"), emit: counts_gene_length_scaled
+    tuple val(meta), path("*gene_counts_scaled.tsv")       , emit: counts_gene_scaled
+    tuple val(meta), path("*gene_lengths.tsv")             , emit: lengths_gene
+    tuple val(meta), path("*transcript_tpm.tsv")           , emit: tpm_transcript
+    tuple val(meta), path("*transcript_counts.tsv")        , emit: counts_transcript
+    tuple val(meta), path("*transcript_lengths.tsv")       , emit: lengths_transcript
+    path "versions.yml"                                    , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    template 'tximport.r'
+
+    stub:
+    """
+    touch ${meta.id}.gene_tpm.tsv
+    touch ${meta.id}.gene_counts.tsv
+    touch ${meta.id}.gene_counts_length_scaled.tsv
+    touch ${meta.id}.gene_counts_scaled.tsv
+    touch ${meta.id}.gene_lengths.tsv
+    touch ${meta.id}.transcript_tpm.tsv
+    touch ${meta.id}.transcript_counts.tsv
+    touch ${meta.id}.transcript_lengths.tsv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        r: \$( R --version | sed '1!d; s/.*version //; s/ .*//' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/tximeta/tximport/meta.yml b/modules/nf-core/tximeta/tximport/meta.yml
@@ -0,0 +1,120 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "tximeta_tximport"
+description: |
+  Import transcript-level abundances and estimated counts for gene-level
+  analysis packages
+keywords:
+  - gene
+  - kallisto
+  - pseudoalignment
+  - salmon
+  - transcript
+tools:
+  - "tximeta":
+      description: "Transcript Quantification Import with Automatic Metadata"
+      homepage: "https://bioconductor.org/packages/release/bioc/html/tximeta.html"
+      documentation: "https://bioconductor.org/packages/release/bioc/vignettes/tximeta/inst/doc/tximeta.html"
+      tool_dev_url: "https://github.com/thelovelab/tximeta"
+      doi: "10.1371/journal.pcbi.1007664"
+      licence: ["GPL-2"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing information related to the experiment as a whole
+        e.g. `[ id:'SRP123456' ]`
+  - quants:
+      type: directory
+      description: Paths to subdirectories corresponding to
+        sample-wise runs of Salmon or Kallisto
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing reference information related to the species
+        reference e.g. `[ id:'yeast' ]`
+  - tx2gene:
+      type: file
+      description: A transcript to gene mapping table such as those generated
+        by custom/tx2gene
+      pattern: "*.{csv,tsv}"
+  - meta3:
+      type: map
+      description: |
+        Groovy Map containing information related to the experiment as a whole
+        e.g. `[ id:'SRP123456' ]`
+  - coldata:
+      type: file
+      description: |
+        Optional 'coldata' file equivalent to a sample sheet where the first
+        column corresponds to the sample names (directory names in the input
+        salmon/ kallisto results)
+      pattern: "*.{csv,tsv}"
+  - quant_type:
+      type: string
+      description: Quantification type, 'kallisto' or 'salmon'
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing information related to the experiment as a whole
+        e.g. `[ id:'SRP123456' ]`
+  - tpm_gene:
+      type: file
+      description: |
+        Abundance (TPM) values derived from tximport output after
+        summarizeToGene(), without a 'countsFromAbundance' specification
+      pattern: "*gene_tpm.tsv"
+  - counts_gene:
+      type: file
+      description: |
+        Count values derived from tximport output after
+        summarizeToGene(), without a 'countsFromAbundance' specification
+      pattern: "*gene_counts.tsv"
+  - counts_gene_length_scaled:
+      type: file
+      description: |
+        Count values derived from tximport output after summarizeToGene(), with
+        a 'countsFromAbundance' specification of 'lengthScaledTPM'
+      pattern: "*gene_counts_length_scaled.tsv"
+  - counts_gene_scaled:
+      type: file
+      description: |
+        Count values derived from tximport output after summarizeToGene(), with
+        a 'countsFromAbundance' specification of 'scaledTPM'
+      pattern: "*gene_counts_scaled.tsv"
+  - gene_lengths:
+      type: file
+      description: |
+        Length values derived from tximport output after summarizeToGene(),
+        without a 'countsFromAbundance' specification
+      pattern: "*gene_lengths.tsv"
+  - tpm_transcript:
+      type: file
+      description: |
+        Abundance (TPM) values derived from tximport output without
+        summarizeToGene(), without a 'countsFromAbundance' specification
+      pattern: "*transcript_tpm.tsv"
+  - counts_transcript:
+      type: file
+      description: |
+        Count values derived from tximport output without
+        summarizeToGene(), without a 'countsFromAbundance' specification
+      pattern: "*transcript_counts.tsv"
+  - transcript_lengths:
+      type: file
+      description: |
+        Length values derived from tximport output without summarizeToGene(),
+        without a 'countsFromAbundance' specification
+      pattern: "*gene_lengths.tsv"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@pinin4fjords"
+maintainers:
+  - "@pinin4fjords"