diff --git a/conf/modules.config b/conf/modules.config index b226ba01..0adc8846 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -192,17 +192,40 @@ process { "--min_length ${params.longreads_min_length}", "--keep_percent ${params.longreads_keep_percent}", "--trim", - "--length_weight ${params.longreads_length_weight}" + "--length_weight ${params.longreads_length_weight}", + params.longreads_min_quality ? "--min_mean_q ${params.longreads_min_quality}" : '', ].join(' ').trim() publishDir = [ path: { "${params.outdir}/QC_longreads/Filtlong" }, mode: params.publish_dir_mode, pattern: "*_filtlong.fastq.gz", - enabled: params.save_filtlong_reads + enabled: params.save_filtered_reads ] ext.prefix = { "${meta.id}_run${meta.run}_filtlong" } } + withName: NANOQ { + ext.args = [ + "--min-len ${params.longreads_min_length}", + params.longreads_min_quality ? "--min-qual ${params.longreads_min_quality}": '', + "-vv" + ].join(' ').trim() + publishDir = [ + [ + path: { "${params.outdir}/QC_longreads/Nanoq" }, + mode: params.publish_dir_mode, + pattern: "*_nanoq_filtered.fastq.gz", + enabled: params.save_filtered_reads + ], + [ + path: { "${params.outdir}/QC_longreads/Nanoq" }, + mode: params.publish_dir_mode, + pattern: "*_nanoq_filtered.stats" + ] + ] + ext.prefix = { "${meta.id}_run${meta.run}_nanoq_filtered" } + } + withName: NANOLYSE { publishDir = [ [ @@ -220,6 +243,28 @@ process { ext.prefix = { "${meta.id}_run${meta.run}_lambdafiltered" } } + withName: CHOPPER { + ext.args2 = [ + !params.keep_lambda ? "--contam ${params.lambda_reference}": "", + params.longreads_min_quality ? "--quality ${params.longreads_min_quality}": "", + params.longreads_min_length ? "--minlength ${params.longreads_min_length}": "", + ].join(' ').trim() + publishDir = [ + [ + path: { "${params.outdir}/QC_longreads/Chopper" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { "${params.outdir}/QC_longreads/Chopper" }, + mode: params.publish_dir_mode, + pattern: "*_chopper.fastq.gz", + enabled: params.save_lambdaremoved_reads + ] + ] + ext.prefix = { "${meta.id}_run${meta.run}_chopper" } + } + withName: NANOPLOT_RAW { ext.prefix = 'raw' ext.args = { diff --git a/modules.json b/modules.json index a72556a7..99e6d2e2 100644 --- a/modules.json +++ b/modules.json @@ -62,6 +62,11 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "chopper": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "concoct/concoct": { "branch": "master", "git_sha": "baa30accc6c50ea8a98662417d4f42ed18966353", @@ -207,6 +212,11 @@ "git_sha": "3135090b46f308a260fc9d5991d7d2f9c0785309", "installed_by": ["modules"] }, + "nanoq": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "porechop/abi": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", diff --git a/modules/nf-core/chopper/environment.yml b/modules/nf-core/chopper/environment.yml new file mode 100644 index 00000000..e80840e1 --- /dev/null +++ b/modules/nf-core/chopper/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::chopper=0.3.0 diff --git a/modules/nf-core/chopper/main.nf b/modules/nf-core/chopper/main.nf new file mode 100644 index 00000000..06f79849 --- /dev/null +++ b/modules/nf-core/chopper/main.nf @@ -0,0 +1,42 @@ +process CHOPPER { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/chopper:0.3.0--hd03093a_0': + 'biocontainers/chopper:0.3.0--hd03093a_0' }" + + input: + tuple val(meta), path(fastq) + + output: + tuple val(meta), path("*.fastq.gz") , emit: fastq + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + if ("$fastq" == "${prefix}.fastq.gz") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + zcat \\ + $args \\ + $fastq | \\ + chopper \\ + --threads $task.cpus \\ + $args2 | \\ + gzip \\ + $args3 > ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + chopper: \$(chopper --version 2>&1 | cut -d ' ' -f 2) + END_VERSIONS + """ +} diff --git a/modules/nf-core/chopper/meta.yml b/modules/nf-core/chopper/meta.yml new file mode 100644 index 00000000..916c865e --- /dev/null +++ b/modules/nf-core/chopper/meta.yml @@ -0,0 +1,60 @@ +name: "chopper" +description: Filter and trim long read data. +keywords: + - filter + - trimming + - fastq + - nanopore + - qc +tools: + - "zcat": + description: "zcat uncompresses either a list of files on the command line or + its standard input and writes the uncompressed data on standard output." + documentation: "https://linux.die.net/man/1/zcat" + args_id: "$args" + identifier: "" + - "chopper": + description: "A rust command line for filtering and trimming long reads." + homepage: "https://github.com/wdecoster/chopper" + documentation: "https://github.com/wdecoster/chopper" + tool_dev_url: "https://github.com/wdecoster/chopper" + doi: "10.1093/bioinformatics/bty149" + licence: ["MIT"] + args_id: "$args2" + identifier: "" + - "gzip": + description: "Gzip reduces the size of the named files using Lempel-Ziv coding + (LZ77)." + documentation: "https://linux.die.net/man/1/gzip" + args_id: "$args3" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastq: + type: file + description: FastQ with reads from long read sequencing e.g. PacBio or ONT + pattern: "*.{fastq.gz}" +output: + - fastq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastq.gz": + type: file + description: Filtered and trimmed FastQ file + pattern: "*.{fastq.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FynnFreyer" +maintainers: + - "@FynnFreyer" diff --git a/modules/nf-core/chopper/tests/main.nf.test b/modules/nf-core/chopper/tests/main.nf.test new file mode 100644 index 00000000..ee195b5f --- /dev/null +++ b/modules/nf-core/chopper/tests/main.nf.test @@ -0,0 +1,45 @@ +nextflow_process { + + name "Test Process CHOPPER" + script "../main.nf" + process "CHOPPER" + tag "chopper" + tag "modules" + tag "modules_nfcore" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id:'test_out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + + def fastq_content = path(process.out.fastq.get(0).get(1)).linesGzip + + assertAll( + { assert process.success }, + // original pytest checks + { assert process.out.fastq.get(0).get(1) ==~ ".*/test_out.fastq.gz" }, + { assert fastq_content.contains("@2109d790-67ec-4fd1-8931-6c7e61908ff3 runid=97ca62ca093ff43533aa34c38a10b1d6325e7e7b read=52274 ch=243 start_time=2021-02-05T23:27:30Z flow_cell_id=FAP51364 protocol_group_id=data sample_id=RN20097 barcode=barcode01 barcode_alias=barcode01")}, + // additional nf-test checks + // Order of reads is not deterministic, so only assess whether the number of reads is correct + { assert snapshot(fastq_content.size()).match("number_of_lines") }, + { assert snapshot(process.out.versions).match("versions") } + + ) + } + + } + +} diff --git a/modules/nf-core/chopper/tests/main.nf.test.snap b/modules/nf-core/chopper/tests/main.nf.test.snap new file mode 100644 index 00000000..d2587e66 --- /dev/null +++ b/modules/nf-core/chopper/tests/main.nf.test.snap @@ -0,0 +1,16 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,5fe28ea455482c9fe88603ddcc461881" + ] + ], + "timestamp": "2023-10-20T08:27:24.592662298" + }, + "number_of_lines": { + "content": [ + 400 + ], + "timestamp": "2023-10-20T08:27:24.581289647" + } +} \ No newline at end of file diff --git a/modules/nf-core/chopper/tests/tags.yml b/modules/nf-core/chopper/tests/tags.yml new file mode 100644 index 00000000..89b6233b --- /dev/null +++ b/modules/nf-core/chopper/tests/tags.yml @@ -0,0 +1,2 @@ +chopper: + - modules/nf-core/chopper/** diff --git a/modules/nf-core/nanoq/environment.yml b/modules/nf-core/nanoq/environment.yml new file mode 100644 index 00000000..1a95d24e --- /dev/null +++ b/modules/nf-core/nanoq/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::nanoq=0.10.0" diff --git a/modules/nf-core/nanoq/main.nf b/modules/nf-core/nanoq/main.nf new file mode 100644 index 00000000..6d35a407 --- /dev/null +++ b/modules/nf-core/nanoq/main.nf @@ -0,0 +1,49 @@ +process NANOQ { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/nanoq:0.10.0--h031d066_2' : + 'biocontainers/nanoq:0.10.0--h031d066_2'}" + + input: + tuple val(meta), path(ontreads) + val(output_format) //One of the following: fastq, fastq.gz, fastq.bz2, fastq.lzma, fasta, fasta.gz, fasta.bz2, fasta.lzma. + + output: + tuple val(meta), path("*.{stats,json}") , emit: stats + tuple val(meta), path("*_filtered.${output_format}") , emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_filtered" + """ + nanoq -i $ontreads \\ + ${args} \\ + -r ${prefix}.stats \\ + -o ${prefix}.$output_format + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nanoq: \$(nanoq --version | sed -e 's/nanoq //g') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_filtered" + """ + echo "" | gzip > ${prefix}.$output_format + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nanoq: \$(nanoq --version | sed -e 's/nanoq //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/nanoq/meta.yml b/modules/nf-core/nanoq/meta.yml new file mode 100644 index 00000000..0ff2b9b4 --- /dev/null +++ b/modules/nf-core/nanoq/meta.yml @@ -0,0 +1,63 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "nanoq" +description: Nanoq implements ultra-fast read filters and summary reports for high-throughput + nanopore reads. +keywords: + - nanoq + - Read filters + - Read trimming + - Read report +tools: + - "nanoq": + description: "Ultra-fast quality control and summary reports for nanopore reads" + homepage: "https://github.com/esteinig/nanoq" + documentation: "https://github.com/esteinig/nanoq" + tool_dev_url: "https://github.com/esteinig/nanoq" + doi: "10.21105/joss.02991" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ontreads: + type: file + description: Compressed or uncompressed nanopore reads in fasta or fastq formats. + pattern: "*.{fa,fna,faa,fasta,fq,fastq}{,.gz,.bz2,.xz}" + - - output_format: + type: string + description: "Specifies the output format. One of these formats: fasta, fastq; + fasta.gz, fastq.gz; fasta.bz2, fastq.bz2; fasta.lzma, fastq.lzma." +output: + - stats: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.{stats,json}": + type: file + description: Summary report of reads statistics. + pattern: "*.{stats,json}" + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*_filtered.${output_format}": + type: file + description: Filtered reads. + pattern: "*.{fasta,fastq}{,.gz,.bz2,.lzma}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@LilyAnderssonLee" +maintainers: + - "@LilyAnderssonLee" diff --git a/modules/nf-core/nanoq/tests/main.nf.test b/modules/nf-core/nanoq/tests/main.nf.test new file mode 100644 index 00000000..ef63d12f --- /dev/null +++ b/modules/nf-core/nanoq/tests/main.nf.test @@ -0,0 +1,122 @@ +nextflow_process { + + name "Test Process NANOQ" + script "../main.nf" + process "NANOQ" + + tag "modules" + tag "modules_nfcore" + tag "nanoq" + + test("sarscov2 - nanopore_uncompressed") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + + input[1] = 'fastq' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - nanopore_compressed_gz") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + input[1] = 'fastq.gz' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + test("sarscov2 - nanopore_compressed_bz2") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + input[1] = 'fastq.bz2' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + test("sarscov2 - nanopore_compressed_lzma") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + input[1] = 'fastq.lzma' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - nanopore_compressed_gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + input[1] = 'fastq.gz' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/nanoq/tests/main.nf.test.snap b/modules/nf-core/nanoq/tests/main.nf.test.snap new file mode 100644 index 00000000..b5dda2a7 --- /dev/null +++ b/modules/nf-core/nanoq/tests/main.nf.test.snap @@ -0,0 +1,267 @@ +{ + "sarscov2 - nanopore_compressed_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.gz:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + "2": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.gz:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "versions": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-11T11:39:32.117229" + }, + "sarscov2 - nanopore_compressed_gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-11T11:42:06.039307" + }, + "sarscov2 - nanopore_compressed_bz2": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.bz2:md5,b53cf14fd4eb5b16c459c41f03cc8a4b" + ] + ], + "2": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.bz2:md5,b53cf14fd4eb5b16c459c41f03cc8a4b" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "versions": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-11T11:39:36.674647" + }, + "sarscov2 - nanopore_compressed_lzma": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.lzma:md5,65dda701689f913734dc245b68c89e07" + ] + ], + "2": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.lzma:md5,65dda701689f913734dc245b68c89e07" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "versions": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-11T11:39:41.51344" + }, + "sarscov2 - nanopore_uncompressed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + "2": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "versions": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-11T11:39:26.868897" + } +} \ No newline at end of file diff --git a/modules/nf-core/nanoq/tests/tags.yml b/modules/nf-core/nanoq/tests/tags.yml new file mode 100644 index 00000000..37457df1 --- /dev/null +++ b/modules/nf-core/nanoq/tests/tags.yml @@ -0,0 +1,2 @@ +nanoq: + - "modules/nf-core/nanoq/**" diff --git a/nextflow.config b/nextflow.config index b6d281d0..f8bf1a4e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -28,7 +28,8 @@ params { adapterremoval_trim_quality_stretch = false keep_phix = false // long read preprocessing options - longread_adaptertrimming_tool = "porechop_abi" + longread_adaptertrimming_tool = "porechop_abi" + longread_filtering_tool = "filtlong" // phix_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz" phix_reference = "${baseDir}/assets/data/GCA_002596845.1_ASM259684v1_genomic.fna.gz" save_phixremoved_reads = false @@ -102,6 +103,7 @@ params { // long read preprocessing options skip_adapter_trimming = false keep_lambda = false + longreads_min_quality = null longreads_min_length = 1000 longreads_keep_percent = 90 longreads_length_weight = 10 @@ -109,7 +111,7 @@ params { lambda_reference = "${baseDir}/assets/data/GCA_000840245.1_ViralProj14204_genomic.fna.gz" save_lambdaremoved_reads = false save_porechop_reads = false - save_filtlong_reads = false + save_filtered_reads = false // binning options skip_metabat2 = false diff --git a/nextflow_schema.json b/nextflow_schema.json index b4809d15..38faad2f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -453,6 +453,11 @@ "default": 1000, "description": "Discard any read which is shorter than this value." }, + "longreads_min_quality": { + "type": "integer", + "default": null, + "description": "Discard any read which has a mean quality score lower than this value." + }, "longreads_keep_percent": { "type": "integer", "default": 90, @@ -482,7 +487,7 @@ "type": "boolean", "description": "Specify to save the resulting clipped FASTQ files to --outdir." }, - "save_filtlong_reads": { + "save_filtered_reads": { "type": "boolean", "description": "Specify to save the resulting length filtered FASTQ files to --outdir." }, @@ -491,6 +496,12 @@ "description": "Specify which long read adapter trimming tool to use.", "enum": ["porechop", "porechop_abi"], "default": "porechop_abi" + }, + "longread_filtering_tool": { + "type": "string", + "description": "Specify which long read filtering tool to use.", + "enum": ["filtlong", "nanoq", "chopper"], + "default": "filtlong" } } }, diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index ec434858..76865c25 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -8,6 +8,8 @@ include { NANOLYSE } from '../../mo include { PORECHOP_PORECHOP } from '../../modules/nf-core/porechop/porechop/main' include { PORECHOP_ABI } from '../../modules/nf-core/porechop/abi/main' include { FILTLONG } from '../../modules/nf-core/filtlong' +include { CHOPPER } from '../../modules/nf-core/chopper' +include { NANOQ } from '../../modules/nf-core/nanoq' workflow LONGREAD_PREPROCESSING { take: @@ -51,7 +53,7 @@ workflow LONGREAD_PREPROCESSING { } } - if (!params.keep_lambda) { + if (!params.keep_lambda && params.longread_filtering_tool != 'chopper') { NANOLYSE ( ch_long_reads, ch_nanolyse_db @@ -60,21 +62,37 @@ workflow LONGREAD_PREPROCESSING { ch_versions = ch_versions.mix(NANOLYSE.out.versions.first()) } - // join long and short reads by sample name - ch_short_reads_tmp = ch_short_reads - .map { meta, sr -> [ meta.id, meta, sr ] } + if (params.longread_filtering_tool == 'filtlong') { + // join long and short reads by sample name + ch_short_reads_tmp = ch_short_reads + .map { meta, sr -> [ meta.id, meta, sr ] } - ch_short_and_long_reads = ch_long_reads - .map { meta, lr -> [ meta.id, meta, lr ] } - .join(ch_short_reads_tmp, by: 0) - .map { id, meta_lr, lr, meta_sr, sr -> [ meta_lr, sr, lr ] } // should not occur for single-end, since SPAdes (hybrid) does not support single-end + ch_short_and_long_reads = ch_long_reads + .map { meta, lr -> [ meta.id, meta, lr ] } + .join(ch_short_reads_tmp, by: 0) + .map { id, meta_lr, lr, meta_sr, sr -> [ meta_lr, sr, lr ] } // should not occur for single-end, since SPAdes (hybrid) does not support single-end - FILTLONG ( - ch_short_and_long_reads - ) - ch_long_reads = FILTLONG.out.reads - ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log ) + FILTLONG ( + ch_short_and_long_reads + ) + ch_long_reads = FILTLONG.out.reads + ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log ) + } else if (params.longread_filtering_tool == 'nanoq') { + NANOQ ( + ch_long_reads, + 'fastq.gz' + ) + ch_long_reads = NANOQ.out.reads + ch_versions = ch_versions.mix(NANOQ.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(NANOQ.out.stats) + } else if (params.longread_filtering_tool == 'chopper') { + CHOPPER ( + ch_long_reads + ) + ch_long_reads = CHOPPER.out.fastq + ch_versions = ch_versions.mix(CHOPPER.out.versions.first()) + } NANOPLOT_FILTERED ( ch_long_reads