diff --git a/modules/nf-core/metaphlan/metaphlan/main.nf b/modules/nf-core/metaphlan/metaphlan/main.nf index 15bd428581a..477f1f28ddd 100644 --- a/modules/nf-core/metaphlan/metaphlan/main.nf +++ b/modules/nf-core/metaphlan/metaphlan/main.nf @@ -23,12 +23,13 @@ process METAPHLAN_METAPHLAN { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def input_type = ("$input".endsWith(".fastq.gz") || "$input".endsWith(".fq.gz")) ? "--input_type fastq" : ("$input".contains(".fasta")) ? "--input_type fasta" : ("$input".endsWith(".bowtie2out.txt")) ? "--input_type bowtie2out" : "--input_type sam" + def input_type = "$input" =~ /.*\.(fastq|fq)/ ? "--input_type fastq" : "$input" =~ /.*\.(fasta|fna|fa)/ ? "--input_type fasta" : "$input".endsWith(".bowtie2out.txt") ? "--input_type bowtie2out" : "--input_type sam" def input_data = ("$input_type".contains("fastq")) && !meta.single_end ? "${input[0]},${input[1]}" : "$input" def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt" """ BT2_DB=`find -L "${metaphlan_db_latest}" -name "*rev.1.bt2l" -exec dirname {} \\;` + BT2_DB_INDEX=`find -L ${metaphlan_db_latest} -name "*.rev.1.bt2l" | sed 's/\\.rev.1.bt2l\$//' | sed 's/.*\\///'` metaphlan \\ --nproc $task.cpus \\ @@ -37,6 +38,7 @@ process METAPHLAN_METAPHLAN { $args \\ $bowtie2_out \\ --bowtie2db \$BT2_DB \\ + --index \$BT2_DB_INDEX \\ --biom ${prefix}.biom \\ --output_file ${prefix}_profile.txt diff --git a/modules/nf-core/peka/main.nf b/modules/nf-core/peka/main.nf new file mode 100644 index 00000000000..416a44a80cc --- /dev/null +++ b/modules/nf-core/peka/main.nf @@ -0,0 +1,59 @@ +process PEKA { + tag "$meta.id" + label 'process_low' + + conda "bioconda::peka=1.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/peka:1.0.0--pyhdfd78af_0': + 'biocontainers/peka:1.0.0--pyhdfd78af_0' }" + + input: + tuple val(meta), path(peaks) + tuple val(meta), path(crosslinks) + path fasta + path fai + path gtf + + output: + tuple val(meta), path("*mer_cluster_distribution*"), emit: cluster, optional: true + tuple val(meta), path("*mer_distribution*") , emit: distribution, optional: true + tuple val(meta), path("*.pdf") , emit: pdf, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def VERSION = '1.0.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + # If the modification date and time of the fai is before the fasta then + # there will be an error. Touching the file first avoids that. + touch $fai + mkdir tmp + TMPDIR=\$(pwd)/tmp peka \ + -i $peaks \ + -x $crosslinks \ + -g $fasta \ + -gi $fai \ + -r $gtf \ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + peka: $VERSION + END_VERSIONS + """ + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.0.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}_4mer_cluster_distribution_genome.tsv + touch ${prefix}_4mer_distribution_genome.tsv + touch ${prefix}_4mer_genome.pdf + cat <<-END_VERSIONS > versions.yml + "${task.process}": + peka: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/peka/meta.yml b/modules/nf-core/peka/meta.yml new file mode 100644 index 00000000000..679e26362de --- /dev/null +++ b/modules/nf-core/peka/meta.yml @@ -0,0 +1,68 @@ +--- +name: "peka" +description: Runs PEKA CLIP peak k-mer analysis +keywords: + - motif + - CLIP + - iCLIP + - genomics + - k-mer +tools: + - "peka": + description: "Positionally-enriched k-mer analysis (PEKA) is a software package for identifying enriched protein-RNA binding motifs from CLIP datasets" + homepage: "https://github.com/ulelab/peka" + documentation: "https://github.com/ulelab/peka" + tool_dev_url: "https://github.com/ulelab/peka" + doi: "10.1186/s13059-022-02755-2" + licence: "['GPL v3']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - peaks: + type: file + description: BED file of peak regions + pattern: "*.{bed,bed.gz}" + - crosslinks: + type: file + description: BED file of crosslinks + pattern: "*.{bed,bed.gz}" + - fasta: + type: file + description: Genome reference sequence used + pattern: "*.{fa,fasta}" + - fai: + type: file + description: FAI file corresponding to the reference sequence + pattern: "*.{fai}" + - gtf: + type: file + description: A segmented GTF used to annotate peaks + pattern: "*.{gtf}" + +output: + - cluster: + type: file + description: TSV file of summed occurrence distributions of k-mers within defined clusters + pattern: "*.tsv" + - distribution: + type: file + description: TSV file with calculated PEKA score and occurrence distribution for all possible k-mers + pattern: "*.tsv" + - pdf: + type: file + description: PDF file with graphs showing k-mer occurrence distributions around thresholded crosslink sites + pattern: "*.pdf" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@kkuret" + - "@codeprimate123" + - "@chris-cheshire" + - "@charlotteanne" diff --git a/modules/nf-core/salmon/quant/main.nf b/modules/nf-core/salmon/quant/main.nf index e356af26d7c..f11b6c2e267 100644 --- a/modules/nf-core/salmon/quant/main.nf +++ b/modules/nf-core/salmon/quant/main.nf @@ -28,7 +28,9 @@ process SALMON_QUANT { prefix = task.ext.prefix ?: "${meta.id}" def reference = "--index $index" - def input_reads = meta.single_end ? "-r $reads" : "-1 ${reads[0]} -2 ${reads[1]}" + def reads1 = [], reads2 = [] + meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } + def input_reads = meta.single_end ? "-r ${reads1.join(" ")}" : "-1 ${reads1.join(" ")} -2 ${reads2.join(" ")}" if (alignment_mode) { reference = "-t $transcript_fasta" input_reads = "-a $reads" diff --git a/modules/nf-core/salmon/quant/meta.yml b/modules/nf-core/salmon/quant/meta.yml index ea01e0df8ce..e809ade2b56 100644 --- a/modules/nf-core/salmon/quant/meta.yml +++ b/modules/nf-core/salmon/quant/meta.yml @@ -22,8 +22,9 @@ input: - reads: type: file description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. + List of input FastQ files for single-end or paired-end data. + Multiple single-end fastqs or pairs of paired-end fastqs are + handled. - index: type: directory description: Folder containing the star index files diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index ed03d8c39c5..4c8b6103644 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -2651,6 +2651,10 @@ peddy: - modules/nf-core/peddy/** - tests/modules/nf-core/peddy/** +peka: + - modules/nf-core/peka/** + - tests/modules/nf-core/peka/** + phantompeakqualtools: - modules/nf-core/phantompeakqualtools/** - tests/modules/nf-core/phantompeakqualtools/** diff --git a/tests/modules/nf-core/metaphlan/metaphlan/main.nf b/tests/modules/nf-core/metaphlan/metaphlan/main.nf index 6e48a94a065..3000feb9062 100644 --- a/tests/modules/nf-core/metaphlan/metaphlan/main.nf +++ b/tests/modules/nf-core/metaphlan/metaphlan/main.nf @@ -53,7 +53,7 @@ workflow test_metaphlan_sam { db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/metaphlan4_database.tar.gz', checkIfExists: true) ] UNTAR ( db ) - SAMTOOLS_VIEW ( input, [] ,[]) + SAMTOOLS_VIEW ( input, [[],[]], []) METAPHLAN_METAPHLAN ( SAMTOOLS_VIEW.out.sam, UNTAR.out.untar.map{ it[1] } ) } diff --git a/tests/modules/nf-core/peka/main.nf b/tests/modules/nf-core/peka/main.nf new file mode 100644 index 00000000000..99d0ec70600 --- /dev/null +++ b/tests/modules/nf-core/peka/main.nf @@ -0,0 +1,22 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { PEKA } from '../../../../modules/nf-core/peka/main.nf' + +workflow test_peka { + + bed_crosslinks = [ [ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/peka/chr21_HepG2-PCBP1-merged.xl.bed", checkIfExists: true) ] + bed_peaks = [ [ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/peka/chr21_HepG2-PCBP1-merged.xl10_200_density2_peaks.bed", checkIfExists: true) ] + regions = file("https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/peka/chr21_gencode_regions.gtf", checkIfExists: true) + fasta = file("https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/peka/chr21.GRCh38.p12.genome.masked.fa", checkIfExists: true) + fai = file("https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/peka/chr21.GRCh38.p12.genome.masked.fa.fai", checkIfExists: true) + + PEKA( + bed_peaks, + bed_crosslinks, + fasta, + fai, + regions + ) +} diff --git a/tests/modules/nf-core/peka/nextflow.config b/tests/modules/nf-core/peka/nextflow.config new file mode 100644 index 00000000000..e99ffa5b24c --- /dev/null +++ b/tests/modules/nf-core/peka/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: 'PEKA' { + ext.args = { "-sr 'genome' -re 'unmasked' -k 4 -p 0" } + } + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } +} \ No newline at end of file diff --git a/tests/modules/nf-core/peka/test.yml b/tests/modules/nf-core/peka/test.yml new file mode 100644 index 00000000000..3054413559f --- /dev/null +++ b/tests/modules/nf-core/peka/test.yml @@ -0,0 +1,9 @@ +- name: peka test_peka + command: nextflow run ./tests/modules/nf-core/peka -entry test_peka -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/peka/nextflow.config + tags: + - peka + files: + - path: output/peka/chr21_HepG2-PCBP1-merged_4mer_cluster_distribution_genome.tsv + - path: output/peka/chr21_HepG2-PCBP1-merged_4mer_distribution_genome.tsv + - path: output/peka/chr21_HepG2-PCBP1-merged_4mer_genome.pdf + - path: output/peka/versions.yml diff --git a/tests/modules/nf-core/salmon/quant/main.nf b/tests/modules/nf-core/salmon/quant/main.nf index 5f402193db5..173e83fac83 100644 --- a/tests/modules/nf-core/salmon/quant/main.nf +++ b/tests/modules/nf-core/salmon/quant/main.nf @@ -57,3 +57,22 @@ workflow test_salmon_quant_single_end_lib_type_A { } +workflow test_salmon_quant_paired_end_multiple { + + input = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) + ] + ] + genome_fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + transcript_fasta = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) + gtf = file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true) + + SALMON_INDEX ( genome_fasta, transcript_fasta ) + SALMON_QUANT ( input, SALMON_INDEX.out.index, gtf, transcript_fasta, false, '' ) + +} diff --git a/tests/modules/nf-core/salmon/quant/test.yml b/tests/modules/nf-core/salmon/quant/test.yml index 2924b512b7e..e8598934343 100644 --- a/tests/modules/nf-core/salmon/quant/test.yml +++ b/tests/modules/nf-core/salmon/quant/test.yml @@ -95,7 +95,6 @@ md5sum: 8d1970505b2b08ca0eb5ff7722b48cde - path: ./output/salmon/salmon/ctg_offsets.bin md5sum: 27a76542337df436436e66017f66dd25 - - path: ./output/salmon/salmon/rank.bin md5sum: 3f34dca1ec26cdf89a6d19b1d1c07e71 - path: ./output/salmon/salmon/pos.bin @@ -149,3 +148,57 @@ md5sum: 3f34dca1ec26cdf89a6d19b1d1c07e71 - path: ./output/salmon/salmon/pos.bin - path: ./output/salmon/salmon/seq.bin + +- name: salmon quant test_salmon_quant_paired_end_multiple + command: nextflow run ./tests/modules/nf-core/salmon/quant -entry test_salmon_quant_paired_end_multiple -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/salmon/quant/nextflow.config + tags: + - salmon/quant + - salmon + files: + - path: output/salmon/salmon/complete_ref_lens.bin + md5sum: f57562f1fca3ae7b133f895ae13c3d08 + - path: output/salmon/salmon/ctable.bin + - path: output/salmon/salmon/ctg_offsets.bin + md5sum: 27a76542337df436436e66017f66dd25 + - path: output/salmon/salmon/duplicate_clusters.tsv + md5sum: 51b5292e3a874119c0e1aa566e95d70c + - path: output/salmon/salmon/info.json + md5sum: 61ff4d3471134c280668355ddd39e99f + - path: output/salmon/salmon/mphf.bin + md5sum: 53669a47610e33e031faafd32703b714 + - path: output/salmon/salmon/pos.bin + - path: output/salmon/salmon/pre_indexing.log + - path: output/salmon/salmon/rank.bin + md5sum: 3f34dca1ec26cdf89a6d19b1d1c07e71 + - path: output/salmon/salmon/refAccumLengths.bin + md5sum: 8d1970505b2b08ca0eb5ff7722b48cde + - path: output/salmon/salmon/ref_indexing.log + - path: output/salmon/salmon/reflengths.bin + md5sum: f57562f1fca3ae7b133f895ae13c3d08 + - path: output/salmon/salmon/refseq.bin + md5sum: 79c4ddf34be3a98d5a7b9d153629a6f7 + - path: output/salmon/salmon/seq.bin + - path: output/salmon/salmon/versionInfo.json + md5sum: 8126856d616d41d63aebd11a440b5b5b + - path: output/salmon/test/aux_info/ambig_info.tsv + md5sum: 1067793c186f56621165add136987d7f + - path: output/salmon/test/aux_info/expected_bias.gz + md5sum: 24ee10af39b41ecf4f4e08faaaf537ee + - path: output/salmon/test/aux_info/fld.gz + - path: output/salmon/test/aux_info/meta_info.json + - path: output/salmon/test/aux_info/observed_bias.gz + md5sum: ef13c06a538e9c34ca9f84212c82f44e + - path: output/salmon/test/aux_info/observed_bias_3p.gz + md5sum: ef13c06a538e9c34ca9f84212c82f44e + - path: output/salmon/test/cmd_info.json + md5sum: 3284f2f259dad6c271d0e0e047854c4f + - path: output/salmon/test/libParams/flenDist.txt + md5sum: 62ced80c88aa784e3019a8ba7ca20236 + - path: output/salmon/test/lib_format_counts.json + md5sum: 20bffb7bef3ffbae97968b85abf8bc14 + - path: output/salmon/test/logs/salmon_quant.log + - path: output/salmon/test/quant.genes.sf + md5sum: 506c564cadf9c6572eb46c92c1d2a075 + - path: output/salmon/test/quant.sf + md5sum: 37ebc8512a158ecdbf996e324eee226d + - path: output/salmon/versions.yml