Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/purecn/run' into purecn/run
Browse files Browse the repository at this point in the history
  • Loading branch information
aldosr committed Jul 6, 2023
2 parents 205abd4 + 08e7352 commit 7665ef3
Show file tree
Hide file tree
Showing 12 changed files with 251 additions and 6 deletions.
4 changes: 3 additions & 1 deletion modules/nf-core/metaphlan/metaphlan/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,13 @@ process METAPHLAN_METAPHLAN {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input_type = ("$input".endsWith(".fastq.gz") || "$input".endsWith(".fq.gz")) ? "--input_type fastq" : ("$input".contains(".fasta")) ? "--input_type fasta" : ("$input".endsWith(".bowtie2out.txt")) ? "--input_type bowtie2out" : "--input_type sam"
def input_type = "$input" =~ /.*\.(fastq|fq)/ ? "--input_type fastq" : "$input" =~ /.*\.(fasta|fna|fa)/ ? "--input_type fasta" : "$input".endsWith(".bowtie2out.txt") ? "--input_type bowtie2out" : "--input_type sam"
def input_data = ("$input_type".contains("fastq")) && !meta.single_end ? "${input[0]},${input[1]}" : "$input"
def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt"

"""
BT2_DB=`find -L "${metaphlan_db_latest}" -name "*rev.1.bt2l" -exec dirname {} \\;`
BT2_DB_INDEX=`find -L ${metaphlan_db_latest} -name "*.rev.1.bt2l" | sed 's/\\.rev.1.bt2l\$//' | sed 's/.*\\///'`
metaphlan \\
--nproc $task.cpus \\
Expand All @@ -37,6 +38,7 @@ process METAPHLAN_METAPHLAN {
$args \\
$bowtie2_out \\
--bowtie2db \$BT2_DB \\
--index \$BT2_DB_INDEX \\
--biom ${prefix}.biom \\
--output_file ${prefix}_profile.txt
Expand Down
59 changes: 59 additions & 0 deletions modules/nf-core/peka/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
process PEKA {
tag "$meta.id"
label 'process_low'

conda "bioconda::peka=1.0.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/peka:1.0.0--pyhdfd78af_0':
'biocontainers/peka:1.0.0--pyhdfd78af_0' }"

input:
tuple val(meta), path(peaks)
tuple val(meta), path(crosslinks)
path fasta
path fai
path gtf

output:
tuple val(meta), path("*mer_cluster_distribution*"), emit: cluster, optional: true
tuple val(meta), path("*mer_distribution*") , emit: distribution, optional: true
tuple val(meta), path("*.pdf") , emit: pdf, optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def VERSION = '1.0.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
# If the modification date and time of the fai is before the fasta then
# there will be an error. Touching the file first avoids that.
touch $fai
mkdir tmp
TMPDIR=\$(pwd)/tmp peka \
-i $peaks \
-x $crosslinks \
-g $fasta \
-gi $fai \
-r $gtf \
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
peka: $VERSION
END_VERSIONS
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '1.0.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
touch ${prefix}_4mer_cluster_distribution_genome.tsv
touch ${prefix}_4mer_distribution_genome.tsv
touch ${prefix}_4mer_genome.pdf
cat <<-END_VERSIONS > versions.yml
"${task.process}":
peka: $VERSION
END_VERSIONS
"""
}
68 changes: 68 additions & 0 deletions modules/nf-core/peka/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
---
name: "peka"
description: Runs PEKA CLIP peak k-mer analysis
keywords:
- motif
- CLIP
- iCLIP
- genomics
- k-mer
tools:
- "peka":
description: "Positionally-enriched k-mer analysis (PEKA) is a software package for identifying enriched protein-RNA binding motifs from CLIP datasets"
homepage: "https://github.com/ulelab/peka"
documentation: "https://github.com/ulelab/peka"
tool_dev_url: "https://github.com/ulelab/peka"
doi: "10.1186/s13059-022-02755-2"
licence: "['GPL v3']"

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- peaks:
type: file
description: BED file of peak regions
pattern: "*.{bed,bed.gz}"
- crosslinks:
type: file
description: BED file of crosslinks
pattern: "*.{bed,bed.gz}"
- fasta:
type: file
description: Genome reference sequence used
pattern: "*.{fa,fasta}"
- fai:
type: file
description: FAI file corresponding to the reference sequence
pattern: "*.{fai}"
- gtf:
type: file
description: A segmented GTF used to annotate peaks
pattern: "*.{gtf}"

output:
- cluster:
type: file
description: TSV file of summed occurrence distributions of k-mers within defined clusters
pattern: "*.tsv"
- distribution:
type: file
description: TSV file with calculated PEKA score and occurrence distribution for all possible k-mers
pattern: "*.tsv"
- pdf:
type: file
description: PDF file with graphs showing k-mer occurrence distributions around thresholded crosslink sites
pattern: "*.pdf"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@kkuret"
- "@codeprimate123"
- "@chris-cheshire"
- "@charlotteanne"
4 changes: 3 additions & 1 deletion modules/nf-core/salmon/quant/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ process SALMON_QUANT {
prefix = task.ext.prefix ?: "${meta.id}"

def reference = "--index $index"
def input_reads = meta.single_end ? "-r $reads" : "-1 ${reads[0]} -2 ${reads[1]}"
def reads1 = [], reads2 = []
meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v }
def input_reads = meta.single_end ? "-r ${reads1.join(" ")}" : "-1 ${reads1.join(" ")} -2 ${reads2.join(" ")}"
if (alignment_mode) {
reference = "-t $transcript_fasta"
input_reads = "-a $reads"
Expand Down
5 changes: 3 additions & 2 deletions modules/nf-core/salmon/quant/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ input:
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
List of input FastQ files for single-end or paired-end data.
Multiple single-end fastqs or pairs of paired-end fastqs are
handled.
- index:
type: directory
description: Folder containing the star index files
Expand Down
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2651,6 +2651,10 @@ peddy:
- modules/nf-core/peddy/**
- tests/modules/nf-core/peddy/**

peka:
- modules/nf-core/peka/**
- tests/modules/nf-core/peka/**

phantompeakqualtools:
- modules/nf-core/phantompeakqualtools/**
- tests/modules/nf-core/phantompeakqualtools/**
Expand Down
2 changes: 1 addition & 1 deletion tests/modules/nf-core/metaphlan/metaphlan/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ workflow test_metaphlan_sam {
db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/metaphlan4_database.tar.gz', checkIfExists: true) ]

UNTAR ( db )
SAMTOOLS_VIEW ( input, [] ,[])
SAMTOOLS_VIEW ( input, [[],[]], [])
METAPHLAN_METAPHLAN ( SAMTOOLS_VIEW.out.sam, UNTAR.out.untar.map{ it[1] } )
}

22 changes: 22 additions & 0 deletions tests/modules/nf-core/peka/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { PEKA } from '../../../../modules/nf-core/peka/main.nf'

workflow test_peka {

bed_crosslinks = [ [ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/peka/chr21_HepG2-PCBP1-merged.xl.bed", checkIfExists: true) ]
bed_peaks = [ [ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/peka/chr21_HepG2-PCBP1-merged.xl10_200_density2_peaks.bed", checkIfExists: true) ]
regions = file("https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/peka/chr21_gencode_regions.gtf", checkIfExists: true)
fasta = file("https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/peka/chr21.GRCh38.p12.genome.masked.fa", checkIfExists: true)
fai = file("https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/peka/chr21.GRCh38.p12.genome.masked.fa.fai", checkIfExists: true)

PEKA(
bed_peaks,
bed_crosslinks,
fasta,
fai,
regions
)
}
6 changes: 6 additions & 0 deletions tests/modules/nf-core/peka/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
process {
withName: 'PEKA' {
ext.args = { "-sr 'genome' -re 'unmasked' -k 4 -p 0" }
}
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}
9 changes: 9 additions & 0 deletions tests/modules/nf-core/peka/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
- name: peka test_peka
command: nextflow run ./tests/modules/nf-core/peka -entry test_peka -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/peka/nextflow.config
tags:
- peka
files:
- path: output/peka/chr21_HepG2-PCBP1-merged_4mer_cluster_distribution_genome.tsv
- path: output/peka/chr21_HepG2-PCBP1-merged_4mer_distribution_genome.tsv
- path: output/peka/chr21_HepG2-PCBP1-merged_4mer_genome.pdf
- path: output/peka/versions.yml
19 changes: 19 additions & 0 deletions tests/modules/nf-core/salmon/quant/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,22 @@ workflow test_salmon_quant_single_end_lib_type_A {

}

workflow test_salmon_quant_paired_end_multiple {

input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true)
]
]
genome_fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
transcript_fasta = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)
gtf = file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)

SALMON_INDEX ( genome_fasta, transcript_fasta )
SALMON_QUANT ( input, SALMON_INDEX.out.index, gtf, transcript_fasta, false, '' )

}
55 changes: 54 additions & 1 deletion tests/modules/nf-core/salmon/quant/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@
md5sum: 8d1970505b2b08ca0eb5ff7722b48cde
- path: ./output/salmon/salmon/ctg_offsets.bin
md5sum: 27a76542337df436436e66017f66dd25

- path: ./output/salmon/salmon/rank.bin
md5sum: 3f34dca1ec26cdf89a6d19b1d1c07e71
- path: ./output/salmon/salmon/pos.bin
Expand Down Expand Up @@ -149,3 +148,57 @@
md5sum: 3f34dca1ec26cdf89a6d19b1d1c07e71
- path: ./output/salmon/salmon/pos.bin
- path: ./output/salmon/salmon/seq.bin

- name: salmon quant test_salmon_quant_paired_end_multiple
command: nextflow run ./tests/modules/nf-core/salmon/quant -entry test_salmon_quant_paired_end_multiple -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/salmon/quant/nextflow.config
tags:
- salmon/quant
- salmon
files:
- path: output/salmon/salmon/complete_ref_lens.bin
md5sum: f57562f1fca3ae7b133f895ae13c3d08
- path: output/salmon/salmon/ctable.bin
- path: output/salmon/salmon/ctg_offsets.bin
md5sum: 27a76542337df436436e66017f66dd25
- path: output/salmon/salmon/duplicate_clusters.tsv
md5sum: 51b5292e3a874119c0e1aa566e95d70c
- path: output/salmon/salmon/info.json
md5sum: 61ff4d3471134c280668355ddd39e99f
- path: output/salmon/salmon/mphf.bin
md5sum: 53669a47610e33e031faafd32703b714
- path: output/salmon/salmon/pos.bin
- path: output/salmon/salmon/pre_indexing.log
- path: output/salmon/salmon/rank.bin
md5sum: 3f34dca1ec26cdf89a6d19b1d1c07e71
- path: output/salmon/salmon/refAccumLengths.bin
md5sum: 8d1970505b2b08ca0eb5ff7722b48cde
- path: output/salmon/salmon/ref_indexing.log
- path: output/salmon/salmon/reflengths.bin
md5sum: f57562f1fca3ae7b133f895ae13c3d08
- path: output/salmon/salmon/refseq.bin
md5sum: 79c4ddf34be3a98d5a7b9d153629a6f7
- path: output/salmon/salmon/seq.bin
- path: output/salmon/salmon/versionInfo.json
md5sum: 8126856d616d41d63aebd11a440b5b5b
- path: output/salmon/test/aux_info/ambig_info.tsv
md5sum: 1067793c186f56621165add136987d7f
- path: output/salmon/test/aux_info/expected_bias.gz
md5sum: 24ee10af39b41ecf4f4e08faaaf537ee
- path: output/salmon/test/aux_info/fld.gz
- path: output/salmon/test/aux_info/meta_info.json
- path: output/salmon/test/aux_info/observed_bias.gz
md5sum: ef13c06a538e9c34ca9f84212c82f44e
- path: output/salmon/test/aux_info/observed_bias_3p.gz
md5sum: ef13c06a538e9c34ca9f84212c82f44e
- path: output/salmon/test/cmd_info.json
md5sum: 3284f2f259dad6c271d0e0e047854c4f
- path: output/salmon/test/libParams/flenDist.txt
md5sum: 62ced80c88aa784e3019a8ba7ca20236
- path: output/salmon/test/lib_format_counts.json
md5sum: 20bffb7bef3ffbae97968b85abf8bc14
- path: output/salmon/test/logs/salmon_quant.log
- path: output/salmon/test/quant.genes.sf
md5sum: 506c564cadf9c6572eb46c92c1d2a075
- path: output/salmon/test/quant.sf
md5sum: 37ebc8512a158ecdbf996e324eee226d
- path: output/salmon/versions.yml

0 comments on commit 7665ef3

Please sign in to comment.