Skip to content

Commit

Permalink
Add tximport module (port from rnaseq) (nf-core#4884)
Browse files Browse the repository at this point in the history
* Add tximport module

* Populate meta

* Fix up tximport

* Appease eclint

* Fix conda

* Fix singularity

* Update test_data.config

* poke ci

* remove rogue empty line

* Update tests/config/test_data.config

Co-authored-by: Maxime U Garcia <[email protected]>

---------

Co-authored-by: Maxime U Garcia <[email protected]>
  • Loading branch information
2 people authored and jennylsmith committed Mar 20, 2024
1 parent 169902c commit 7c31a15
Show file tree
Hide file tree
Showing 8 changed files with 1,201 additions and 1 deletion.
9 changes: 9 additions & 0 deletions modules/nf-core/tximeta/tximport/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "tximeta_tximport"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::bioconductor-tximeta=1.20.1"
48 changes: 48 additions & 0 deletions modules/nf-core/tximeta/tximport/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
process TXIMETA_TXIMPORT {
label "process_medium"

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-tximeta%3A1.20.1--r43hdfd78af_0' :
'biocontainers/bioconductor-tximeta:1.20.1--r43hdfd78af_0' }"

input:
tuple val(meta), path("quants/*")
tuple val(meta2), path(tx2gene)
tuple val(meta3), path(coldata)
val quant_type

output:
tuple val(meta), path("*gene_tpm.tsv") , emit: tpm_gene
tuple val(meta), path("*gene_counts.tsv") , emit: counts_gene
tuple val(meta), path("*gene_counts_length_scaled.tsv"), emit: counts_gene_length_scaled
tuple val(meta), path("*gene_counts_scaled.tsv") , emit: counts_gene_scaled
tuple val(meta), path("*gene_lengths.tsv") , emit: lengths_gene
tuple val(meta), path("*transcript_tpm.tsv") , emit: tpm_transcript
tuple val(meta), path("*transcript_counts.tsv") , emit: counts_transcript
tuple val(meta), path("*transcript_lengths.tsv") , emit: lengths_transcript
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
template 'tximport.r'

stub:
"""
touch ${meta.id}.gene_tpm.tsv
touch ${meta.id}.gene_counts.tsv
touch ${meta.id}.gene_counts_length_scaled.tsv
touch ${meta.id}.gene_counts_scaled.tsv
touch ${meta.id}.gene_lengths.tsv
touch ${meta.id}.transcript_tpm.tsv
touch ${meta.id}.transcript_counts.tsv
touch ${meta.id}.transcript_lengths.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
r: \$( R --version | sed '1!d; s/.*version //; s/ .*//' )
END_VERSIONS
"""
}
120 changes: 120 additions & 0 deletions modules/nf-core/tximeta/tximport/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "tximeta_tximport"
description: |
Import transcript-level abundances and estimated counts for gene-level
analysis packages
keywords:
- gene
- kallisto
- pseudoalignment
- salmon
- transcript
tools:
- "tximeta":
description: "Transcript Quantification Import with Automatic Metadata"
homepage: "https://bioconductor.org/packages/release/bioc/html/tximeta.html"
documentation: "https://bioconductor.org/packages/release/bioc/vignettes/tximeta/inst/doc/tximeta.html"
tool_dev_url: "https://github.com/thelovelab/tximeta"
doi: "10.1371/journal.pcbi.1007664"
licence: ["GPL-2"]

input:
- meta:
type: map
description: |
Groovy Map containing information related to the experiment as a whole
e.g. `[ id:'SRP123456' ]`
- quants:
type: directory
description: Paths to subdirectories corresponding to
sample-wise runs of Salmon or Kallisto
- meta2:
type: map
description: |
Groovy Map containing reference information related to the species
reference e.g. `[ id:'yeast' ]`
- tx2gene:
type: file
description: A transcript to gene mapping table such as those generated
by custom/tx2gene
pattern: "*.{csv,tsv}"
- meta3:
type: map
description: |
Groovy Map containing information related to the experiment as a whole
e.g. `[ id:'SRP123456' ]`
- coldata:
type: file
description: |
Optional 'coldata' file equivalent to a sample sheet where the first
column corresponds to the sample names (directory names in the input
salmon/ kallisto results)
pattern: "*.{csv,tsv}"
- quant_type:
type: string
description: Quantification type, 'kallisto' or 'salmon'

output:
- meta:
type: map
description: |
Groovy Map containing information related to the experiment as a whole
e.g. `[ id:'SRP123456' ]`
- tpm_gene:
type: file
description: |
Abundance (TPM) values derived from tximport output after
summarizeToGene(), without a 'countsFromAbundance' specification
pattern: "*gene_tpm.tsv"
- counts_gene:
type: file
description: |
Count values derived from tximport output after
summarizeToGene(), without a 'countsFromAbundance' specification
pattern: "*gene_counts.tsv"
- counts_gene_length_scaled:
type: file
description: |
Count values derived from tximport output after summarizeToGene(), with
a 'countsFromAbundance' specification of 'lengthScaledTPM'
pattern: "*gene_counts_length_scaled.tsv"
- counts_gene_scaled:
type: file
description: |
Count values derived from tximport output after summarizeToGene(), with
a 'countsFromAbundance' specification of 'scaledTPM'
pattern: "*gene_counts_scaled.tsv"
- gene_lengths:
type: file
description: |
Length values derived from tximport output after summarizeToGene(),
without a 'countsFromAbundance' specification
pattern: "*gene_lengths.tsv"
- tpm_transcript:
type: file
description: |
Abundance (TPM) values derived from tximport output without
summarizeToGene(), without a 'countsFromAbundance' specification
pattern: "*transcript_tpm.tsv"
- counts_transcript:
type: file
description: |
Count values derived from tximport output without
summarizeToGene(), without a 'countsFromAbundance' specification
pattern: "*transcript_counts.tsv"
- transcript_lengths:
type: file
description: |
Length values derived from tximport output without summarizeToGene(),
without a 'countsFromAbundance' specification
pattern: "*gene_lengths.tsv"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@pinin4fjords"
maintainers:
- "@pinin4fjords"
Loading

0 comments on commit 7c31a15

Please sign in to comment.