diff --git a/bin/assembly2bedpe.py b/bin/assembly2bedpe.py new file mode 100755 index 0000000..94e4968 --- /dev/null +++ b/bin/assembly2bedpe.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +import sys +import pandas as pd + + +def read_assembly_file_cols(assembly_file_name): + with open(assembly_file_name, "r") as file: + lines = file.readlines() + + list_of_items = [line.replace("\n", "").split(" ") for line in lines] + list_of_three_tuples = [items for items in list_of_items if len(items) == 3] + list_of_three_tuples_wt = [ + [x[0], int(x[1]), int(x[2])] for x in list_of_three_tuples + ] + + df = pd.DataFrame(list_of_three_tuples_wt) + df.columns = ["name", "number", "length"] + + return df + + +def make_bedpe_cols(assembly_file_pd): + pd = assembly_file_pd + pd["cum_length"] = pd["length"].cumsum() + pd["end_index"] = pd["cum_length"] - 1 + + start_index = pd["end_index"].shift(periods=1, fill_value=-1) + 1 + pd["start_index"] = start_index + + return pd + + +def print_bed_pe_file(bed_pe_df): + df = bed_pe_df + print("chr1\tx1\tx2\tchr2\ty1\ty2\tname\tscore\tstrand1\tstrand2\tcolor") + for index, row in df.iterrows(): + print( + f"assembly\t{row['start_index']}\t{row['end_index']}\tassembly\t{row['start_index']}\t{row['end_index']}\t{row['name'].replace('>', '')}\t.\t.\t.\t0,0,255" + ) + + +if __name__ == "__main__": + assembly_file_name = sys.argv[1] + + assembly_file_cols = read_assembly_file_cols(assembly_file_name) + print_bed_pe_file(make_bedpe_cols(assembly_file_cols)) diff --git a/bin/hic2html.py b/bin/hic2html.py new file mode 100755 index 0000000..eacc08f --- /dev/null +++ b/bin/hic2html.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python + +import sys +from pathlib import Path +import os + + +if __name__ == "__main__": + hic_file_name = os.path.basename(sys.argv[1]) + + projectDir = "/".join(__file__.split("/")[0:-1]) + html_template_path = Path( + f"{projectDir}/report_modules/templates/hic/hic_html_template.html" + ) + + with open(html_template_path) as f: + html_file_lines = "".join(f.readlines()) + + filled_template = html_file_lines.replace("HIC_FILE_NAME", hic_file_name).replace( + "BEDPE_FILE_NAME", + f"{hic_file_name.replace('.hic', '')}.assembly.bedpe", + ) + + print(filled_template) diff --git a/bin/report_modules/templates/hic/hic_html_template.html b/bin/report_modules/templates/hic/hic_html_template.html new file mode 100644 index 0000000..6d3f408 --- /dev/null +++ b/bin/report_modules/templates/hic/hic_html_template.html @@ -0,0 +1,92 @@ + + + + + HiC Contact Map + + + + + + + + +
+
+
+ + + diff --git a/conf/base.config b/conf/base.config index e683193..9f5c47e 100644 --- a/conf/base.config +++ b/conf/base.config @@ -61,6 +61,12 @@ process { withName:NCBI_FCS_GX_SCREEN_SAMPLES { memory = { check_max( 512.GB * task.attempt, 'memory' ) } } + withName:BWA_MEM { + time = { check_max( 2.day * task.attempt, 'time' ) } + } + withName:SAMBLASTER { + time = { check_max( 20.h * task.attempt, 'time' ) } + } withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } diff --git a/modules.json b/modules.json index d68ea0e..a57849e 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,16 @@ "git@github.com:PlantandFoodResearch/nxf-modules.git": { "modules": { "pfr": { + "bwa/index": { + "branch": "main", + "git_sha": "4b9da80b1e4c16067babd97554bea42d7cd9ca85", + "installed_by": ["fastq_bwa_mem_samblaster"] + }, + "bwa/mem": { + "branch": "main", + "git_sha": "835879b8f174bb4d2c5534d0381ffbe62cc1b060", + "installed_by": ["fastq_bwa_mem_samblaster"] + }, "cat/cat": { "branch": "main", "git_sha": "4b9da80b1e4c16067babd97554bea42d7cd9ca85", @@ -59,6 +69,11 @@ "branch": "main", "git_sha": "835879b8f174bb4d2c5534d0381ffbe62cc1b060", "installed_by": ["fasta_ltrretriever_lai"] + }, + "samblaster": { + "branch": "main", + "git_sha": "73358a6712178b9a67c39f92e65e8144b5880eae", + "installed_by": ["fastq_bwa_mem_samblaster"] } } }, @@ -69,6 +84,11 @@ "git_sha": "154661d7c1769532ff7b5f11259644ec200dd47d", "installed_by": ["subworkflows"] }, + "fastq_bwa_mem_samblaster": { + "branch": "main", + "git_sha": "9639ac9a556898d0f0e8592bff24585c33326458", + "installed_by": ["subworkflows"] + }, "gff3_validate": { "branch": "main", "git_sha": "f9b96bf8142a01f0649ff90570fb10aa973504b9", diff --git a/modules/local/agp2assembly.nf b/modules/local/agp2assembly.nf new file mode 100644 index 0000000..edb60d9 --- /dev/null +++ b/modules/local/agp2assembly.nf @@ -0,0 +1,19 @@ +process AGP2ASSEMBLY { + tag "$sample_id_on_tag" + label 'process_single' + + container "docker.io/gallvp/juicebox_scripts:a7ae991_ps" + publishDir "${params.outdir}/hic/assembly", mode:'copy' + + input: + tuple val(sample_id_on_tag), path(agp_file) + + output: + tuple val(sample_id_on_tag), path("*.agp.assembly"), emit: assembly + + script: + """ + assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g') + agp2assembly.py $agp_file "\${assembly_tag}.agp.assembly" + """ +} diff --git a/modules/local/assembly2bedpe.nf b/modules/local/assembly2bedpe.nf new file mode 100644 index 0000000..d0b7f6a --- /dev/null +++ b/modules/local/assembly2bedpe.nf @@ -0,0 +1,25 @@ +process ASSEMBLY2BEDPE { + tag "$sample_id_on_tag" + label 'process_single' + + container "docker.io/gallvp/python3npkgs:v0.4" + publishDir "${params.outdir}/hic/bedpe", mode:'copy' + + input: + tuple val(sample_id_on_tag), path(agp_assembly_file) + + output: + tuple val(sample_id_on_tag), path("*.assembly.bedpe"), emit: bedpe + + script: + """ + assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g') + assembly2bedpe.py $agp_assembly_file > "\${assembly_tag}.assembly.bedpe" + """ + + stub: + """ + assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g') + touch "\${assembly_tag}.assembly.bedpe" + """ +} diff --git a/modules/local/hic2html.nf b/modules/local/hic2html.nf new file mode 100644 index 0000000..ba0df63 --- /dev/null +++ b/modules/local/hic2html.nf @@ -0,0 +1,19 @@ +process HIC2HTML { + tag "$sample_id_on_tag" + label 'process_single' + + container "docker.io/gallvp/python3npkgs:v0.4" + publishDir "${params.outdir}/hic", mode: 'copy' + + input: + tuple val(sample_id_on_tag), path(hic_file) + + output: + path "*.html", emit: html + + script: + """ + file_name="$hic_file" + hic2html.py "$hic_file" > "\${file_name%.*}.html" + """ +} diff --git a/modules/local/hicqc.nf b/modules/local/hicqc.nf new file mode 100644 index 0000000..0f5c740 --- /dev/null +++ b/modules/local/hicqc.nf @@ -0,0 +1,37 @@ +process HICQC { + tag "$meta.id" + label 'process_single' + + publishDir "${params.outdir}/hic/hic_qc", mode:'copy' + container "docker.io/gallvp/hic_qc:6881c33_ps" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.pdf") , emit: pdf + path "versions.yml" , emit: versions + + script: + """ + hic_qc.py \\ + -n 10000000 \\ + -b $bam \\ + --outfile_prefix "$meta.id" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hic_qc.py: \$(hic_qc.py --version) + END_VERSIONS + """ + + stub: + """ + touch "${meta.id}.pdf" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hic_qc.py: \$(hic_qc.py --version) + END_VERSIONS + """ +} diff --git a/modules/local/juicer_sort.nf b/modules/local/juicer_sort.nf new file mode 100644 index 0000000..c57124b --- /dev/null +++ b/modules/local/juicer_sort.nf @@ -0,0 +1,22 @@ +process JUICER_SORT { + tag "$sample_id_on_tag" + label 'process_high' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04': + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(sample_id_on_tag), path(out_links_txt) + + output: + tuple val(sample_id_on_tag), path("*sorted.links.txt"), emit: links + + script: + """ + sort --parallel=${task.cpus} \\ + -k2,2 -k6,6 \\ + $out_links_txt \\ + > out.sorted.links.txt + """ +} diff --git a/modules/local/makeagpfromfasta.nf b/modules/local/makeagpfromfasta.nf new file mode 100644 index 0000000..3c5a097 --- /dev/null +++ b/modules/local/makeagpfromfasta.nf @@ -0,0 +1,18 @@ +process MAKEAGPFROMFASTA { + tag "$sample_id_on_tag" + label 'process_single' + + container "docker.io/gallvp/juicebox_scripts:a7ae991_ps" + + input: + tuple val(sample_id_on_tag), path(assembly_fasta) + + output: + tuple val(sample_id_on_tag), path("*.agp"), emit: agp + + script: + """ + file_name="$assembly_fasta" + makeAgpFromFasta.py $assembly_fasta "\${file_name%%.*}.agp" + """ +} diff --git a/modules/local/matlock_bam2_juicer.nf b/modules/local/matlock_bam2_juicer.nf new file mode 100644 index 0000000..5222acd --- /dev/null +++ b/modules/local/matlock_bam2_juicer.nf @@ -0,0 +1,19 @@ +process MATLOCK_BAM2_JUICER { + tag "$sample_id_on_tag" + label 'process_single' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/matlock:20181227--h4b03ef3_3': + 'biocontainers/matlock:20181227--h4b03ef3_3' }" + + input: + tuple val(sample_id_on_tag), path(hic_bam_scaffolds) + + output: + tuple val(sample_id_on_tag), path("out.links.txt") + + script: + """ + matlock bam2 juicer $hic_bam_scaffolds out.links.txt + """ +} diff --git a/modules/local/runassemblyvisualizer.nf b/modules/local/runassemblyvisualizer.nf new file mode 100644 index 0000000..a13ed92 --- /dev/null +++ b/modules/local/runassemblyvisualizer.nf @@ -0,0 +1,31 @@ +process RUNASSEMBLYVISUALIZER { + tag "$sample_id_on_tag" + label "process_medium" + + publishDir "${params.outdir}/hic", mode:'copy' + container "docker.io/gallvp/3d-dna:63029aa" + + input: + tuple val(sample_id_on_tag), path(agp_assembly_file), path(sorted_links_txt_file) + + output: + tuple val(sample_id_on_tag), path("*.hic"), emit: hic + + script: + // -p true/false Use GNU Parallel to speed up computation (default is true). + """ + assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g') + file_name="${agp_assembly_file}" + + /usr/src/3d-dna/visualize/run-assembly-visualizer.sh \\ + $agp_assembly_file $sorted_links_txt_file + + mv "\${file_name%.*}.hic" "\${assembly_tag}.hic" + """ + + stub: + """ + assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g') + touch "\${assembly_tag}.hic" + """ +} diff --git a/modules/pfr/bwa/index/environment.yml b/modules/pfr/bwa/index/environment.yml new file mode 100644 index 0000000..5d3cb32 --- /dev/null +++ b/modules/pfr/bwa/index/environment.yml @@ -0,0 +1,7 @@ +name: bwa_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bwa=0.7.17 diff --git a/modules/pfr/bwa/index/main.nf b/modules/pfr/bwa/index/main.nf new file mode 100644 index 0000000..24b5a2e --- /dev/null +++ b/modules/pfr/bwa/index/main.nf @@ -0,0 +1,53 @@ +process BWA_INDEX { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : + 'biocontainers/bwa:0.7.17--hed695b0_7' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path(bwa) , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${fasta.baseName}" + def args = task.ext.args ?: '' + """ + mkdir bwa + bwa \\ + index \\ + $args \\ + -p bwa/${prefix} \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${fasta.baseName}" + """ + mkdir bwa + + touch bwa/${prefix}.amb + touch bwa/${prefix}.ann + touch bwa/${prefix}.bwt + touch bwa/${prefix}.pac + touch bwa/${prefix}.sa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/pfr/bwa/index/meta.yml b/modules/pfr/bwa/index/meta.yml new file mode 100644 index 0000000..730628d --- /dev/null +++ b/modules/pfr/bwa/index/meta.yml @@ -0,0 +1,45 @@ +name: bwa_index +description: Create BWA index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file +output: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "*.{amb,ann,bwt,pac,sa}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@maxulysse" diff --git a/modules/pfr/bwa/index/tests/main.nf.test b/modules/pfr/bwa/index/tests/main.nf.test new file mode 100644 index 0000000..2f33c0e --- /dev/null +++ b/modules/pfr/bwa/index/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process BWA_INDEX" + tag "modules_nfcore" + tag "modules" + tag "bwa" + tag "bwa/index" + script "../main.nf" + process "BWA_INDEX" + + test("BWA index") { + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/pfr/bwa/index/tests/main.nf.test.snap b/modules/pfr/bwa/index/tests/main.nf.test.snap new file mode 100644 index 0000000..e51ad5b --- /dev/null +++ b/modules/pfr/bwa/index/tests/main.nf.test.snap @@ -0,0 +1,43 @@ +{ + "BWA index": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", + "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", + "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" + ] + ] + ], + "1": [ + "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f" + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", + "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", + "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" + ] + ] + ], + "versions": [ + "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f" + ] + } + ], + "timestamp": "2023-10-17T17:20:20.180927714" + } +} \ No newline at end of file diff --git a/modules/pfr/bwa/index/tests/tags.yml b/modules/pfr/bwa/index/tests/tags.yml new file mode 100644 index 0000000..28bb483 --- /dev/null +++ b/modules/pfr/bwa/index/tests/tags.yml @@ -0,0 +1,2 @@ +bwa/index: + - modules/nf-core/bwa/index/** diff --git a/modules/pfr/bwa/mem/environment.yml b/modules/pfr/bwa/mem/environment.yml new file mode 100644 index 0000000..3f136d0 --- /dev/null +++ b/modules/pfr/bwa/mem/environment.yml @@ -0,0 +1,10 @@ +name: bwa_mem +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bwa=0.7.17 + # renovate: datasource=conda depName=bioconda/samtools + - samtools=1.19.2 + - htslib=1.19.1 diff --git a/modules/pfr/bwa/mem/main.nf b/modules/pfr/bwa/mem/main.nf new file mode 100644 index 0000000..54ec0f1 --- /dev/null +++ b/modules/pfr/bwa/mem/main.nf @@ -0,0 +1,55 @@ +process BWA_MEM { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:a34558545ae1413d94bde4578787ebef08027945-0' : + 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:a34558545ae1413d94bde4578787ebef08027945-0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index) + val sort_bam + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + bwa mem \\ + $args \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/pfr/bwa/mem/meta.yml b/modules/pfr/bwa/mem/meta.yml new file mode 100644 index 0000000..440fb1f --- /dev/null +++ b/modules/pfr/bwa/mem/meta.yml @@ -0,0 +1,58 @@ +name: bwa_mem +description: Performs fastq alignment to a fasta reference using BWA +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sam +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@jeremy1805" +maintainers: + - "@drpatelh" + - "@jeremy1805" diff --git a/modules/pfr/bwa/mem/tests/main.nf.test b/modules/pfr/bwa/mem/tests/main.nf.test new file mode 100644 index 0000000..cd6591f --- /dev/null +++ b/modules/pfr/bwa/mem/tests/main.nf.test @@ -0,0 +1,173 @@ +nextflow_process { + + name "Test Process BWA_MEM" + tag "modules_nfcore" + tag "modules" + tag "bwa" + tag "bwa/mem" + tag "bwa/index" + script "../main.nf" + process "BWA_MEM" + + test("Single-End") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Single-End Sort") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Paired-End") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Paired-End Sort") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/pfr/bwa/mem/tests/main.nf.test.snap b/modules/pfr/bwa/mem/tests/main.nf.test.snap new file mode 100644 index 0000000..e4fd8cc --- /dev/null +++ b/modules/pfr/bwa/mem/tests/main.nf.test.snap @@ -0,0 +1,142 @@ +{ + "Single-End": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,a74710a0345b4717bb4431bf9c257120" + ] + ], + "1": [ + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,a74710a0345b4717bb4431bf9c257120" + ] + ], + "versions": [ + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T11:11:48.440661587" + }, + "Single-End Sort": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,cb1e038bc4d990683fa485d632550b54" + ] + ], + "1": [ + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,cb1e038bc4d990683fa485d632550b54" + ] + ], + "versions": [ + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T11:11:56.086493265" + }, + "Paired-End": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,aea123a3828a99da1906126355f15a12" + ] + ], + "1": [ + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,aea123a3828a99da1906126355f15a12" + ] + ], + "versions": [ + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T11:12:03.474974773" + }, + "Paired-End Sort": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,4682087bcdc3617384b375093fecd8dd" + ] + ], + "1": [ + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,4682087bcdc3617384b375093fecd8dd" + ] + ], + "versions": [ + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T11:12:10.721510817" + } +} \ No newline at end of file diff --git a/modules/pfr/bwa/mem/tests/tags.yml b/modules/pfr/bwa/mem/tests/tags.yml new file mode 100644 index 0000000..82992d1 --- /dev/null +++ b/modules/pfr/bwa/mem/tests/tags.yml @@ -0,0 +1,3 @@ +bwa/mem: + - modules/nf-core/bwa/index/** + - modules/nf-core/bwa/mem/** diff --git a/modules/pfr/samblaster/environment.yml b/modules/pfr/samblaster/environment.yml new file mode 100644 index 0000000..a6e3038 --- /dev/null +++ b/modules/pfr/samblaster/environment.yml @@ -0,0 +1,9 @@ +name: samblaster +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samblaster=0.1.26 + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/pfr/samblaster/main.nf b/modules/pfr/samblaster/main.nf new file mode 100644 index 0000000..c9e89af --- /dev/null +++ b/modules/pfr/samblaster/main.nf @@ -0,0 +1,50 @@ +process SAMBLASTER { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:60ebac4ad9c6530c0d7bf6844f52ec6916e1e0b1-0' : + 'biocontainers/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:60ebac4ad9c6530c0d7bf6844f52ec6916e1e0b1-0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if( "$bam" == "${prefix}.bam" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools view -h $args2 $bam | \\ + samblaster $args | \\ + samtools view $args3 -Sb - >${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samblaster: \$( samblaster -h 2>&1 | head -n 1 | sed 's/^samblaster: Version //' ) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + if( "$bam" == "${prefix}.bam" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch "${prefix}.bam" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samblaster: \$( samblaster -h 2>&1 | head -n 1 | sed 's/^samblaster: Version //' ) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/pfr/samblaster/meta.yml b/modules/pfr/samblaster/meta.yml new file mode 100644 index 0000000..ccb4832 --- /dev/null +++ b/modules/pfr/samblaster/meta.yml @@ -0,0 +1,53 @@ +name: samblaster +description: | + This module combines samtools and samblaster in order to use + samblaster capability to filter or tag SAM files, with the advantage + of maintaining both input and output in BAM format. + Samblaster input must contain a sequence header: for this reason it has been piped + with the "samtools view -h" command. + Additional desired arguments for samtools can be passed using: + options.args2 for the input bam file + options.args3 for the output bam file +keywords: + - sort + - duplicate marking + - bam +tools: + - samblaster: + description: | + samblaster is a fast and flexible program for marking duplicates in read-id grouped paired-end SAM files. + It can also optionally output discordant read pairs and/or split read mappings to separate SAM files, + and/or unmapped/clipped reads to a separate FASTQ file. + By default, samblaster reads SAM input from stdin and writes SAM to stdout. + documentation: https://github.com/GregoryFaust/samblaster + tool_dev_url: https://github.com/GregoryFaust/samblaster + doi: "10.1093/bioinformatics/btu314" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.bam" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Tagged or filtered BAM file + pattern: "*.bam" +authors: + - "@lescai" +maintainers: + - "@lescai" diff --git a/subworkflows/local/fq2hic.nf b/subworkflows/local/fq2hic.nf new file mode 100644 index 0000000..2e26cc2 --- /dev/null +++ b/subworkflows/local/fq2hic.nf @@ -0,0 +1,79 @@ +include { FASTQ_TRIM_FASTP_FASTQC } from '../nf-core/fastq_trim_fastp_fastqc/main' +include { FASTQ_BWA_MEM_SAMBLASTER } from '../pfr/fastq_bwa_mem_samblaster/main' +include { HICQC } from '../../modules/local/hicqc' +include { MAKEAGPFROMFASTA } from '../../modules/local/makeagpfromfasta' +include { AGP2ASSEMBLY } from '../../modules/local/agp2assembly' +include { ASSEMBLY2BEDPE } from '../../modules/local/assembly2bedpe' +include { MATLOCK_BAM2_JUICER } from '../../modules/local/matlock_bam2_juicer' +include { JUICER_SORT } from '../../modules/local/juicer_sort' +include { RUNASSEMBLYVISUALIZER } from '../../modules/local/runassemblyvisualizer' +include { HIC2HTML } from '../../modules/local/hic2html' + +workflow FQ2HIC { + take: + reads // [ val(meta), [ fq ] ] + ref // [ val(meta2), fa ] + hic_skip_fastp // val: true|false + hic_skip_fastqc // val: true|false + + main: + ch_versions = Channel.empty() + + // SUBWORKFLOW: FASTQ_TRIM_FASTP_FASTQC + FASTQ_TRIM_FASTP_FASTQC( + reads, + [], + true, // val_save_trimmed_fail + false, // val_save_merged + hic_skip_fastp, + hic_skip_fastqc + ) + + ch_trim_reads = FASTQ_TRIM_FASTP_FASTQC.out.reads + ch_versions = ch_versions.mix(FASTQ_TRIM_FASTP_FASTQC.out.versions) + + // SUBWORKFLOW: FASTQ_BWA_MEM_SAMBLASTER + FASTQ_BWA_MEM_SAMBLASTER( + ch_trim_reads, + ref.map { meta2, fa -> [ meta2, fa, [] ] } + ) + + ch_bam = FASTQ_BWA_MEM_SAMBLASTER.out.bam + ch_versions = ch_versions.mix(FASTQ_BWA_MEM_SAMBLASTER.out.versions) + + // MODULE: HICQC + ch_bam_and_ref = ch_bam + | map { meta, bam -> [ meta.ref_id, meta, bam ] } + | join( + ref.map { meta2, fa -> [ meta2.id, fa ] } + ) + | map { ref_id, meta, bam, fa -> + [ [ id: "${meta.id}.on.${meta.ref_id}" ], bam, fa ] + } + + HICQC ( ch_bam_and_ref.map { meta3, bam, fa -> [ meta3, bam ] } ) + + ch_versions = ch_versions.mix(HICQC.out.versions) + + // MODULE: MAKEAGPFROMFASTA | AGP2ASSEMBLY | ASSEMBLY2BEDPE + MAKEAGPFROMFASTA ( ch_bam_and_ref.map { meta3, bam, fa -> [ meta3.id, fa ] } ) + | AGP2ASSEMBLY + | ASSEMBLY2BEDPE + + // MODULE: MATLOCK_BAM2_JUICER | JUICER_SORT + MATLOCK_BAM2_JUICER ( ch_bam_and_ref.map { meta3, bam, fa -> [ meta3.id, bam ] } ) + | JUICER_SORT + + // MODULE: RUNASSEMBLYVISUALIZER + RUNASSEMBLYVISUALIZER ( AGP2ASSEMBLY.out.assembly.join(JUICER_SORT.out.links) ) + + ch_hic = RUNASSEMBLYVISUALIZER.out.hic + + // MODULE: HIC2HTML + HIC2HTML ( ch_hic ) + + emit: + hic = ch_hic + html = HIC2HTML.out.html + versions = ch_versions +} diff --git a/subworkflows/pfr/fastq_bwa_mem_samblaster/main.nf b/subworkflows/pfr/fastq_bwa_mem_samblaster/main.nf new file mode 100644 index 0000000..50e824a --- /dev/null +++ b/subworkflows/pfr/fastq_bwa_mem_samblaster/main.nf @@ -0,0 +1,61 @@ +include { BWA_INDEX } from '../../../modules/pfr/bwa/index/main' +include { BWA_MEM } from '../../../modules/pfr/bwa/mem/main' +include { SAMBLASTER } from '../../../modules/pfr/samblaster/main' + +workflow FASTQ_BWA_MEM_SAMBLASTER { + + take: + ch_fastq // channel: [ val(meta), [ fq ] ] + ch_reference // channel: [ val(meta2), fasta, index ]; fast | index + + main: + ch_versions = Channel.empty() + + ch_has_index = ch_reference + | branch { meta2, fasta, index -> + yes: index + no: !index + } + + // MODULE: BWA_INDEX + BWA_INDEX ( ch_has_index.no.map { meta2, fasta, index -> [ meta2, fasta ] } ) + + ch_bwa_index = BWA_INDEX.out.index + | mix( + ch_has_index.yes + | map { meta2, fasta, index -> + [ meta2, index ] + } + ) + + ch_versions = ch_versions.mix(BWA_INDEX.out.versions.first()) + + // MODULE: BWA_MEM + ch_mem_inputs = ch_fastq + | combine( + ch_bwa_index + ) + | map { meta, fq, meta2, index -> + [ meta + [ ref_id: meta2.id ], fq, index ] + } + + def sort_bam = false + BWA_MEM( + ch_mem_inputs.map { meta, fq, index -> [ meta, fq ] }, + ch_mem_inputs.map { meta, fq, index -> [ [], index ] }, + sort_bam + ) + + ch_mem_bam = BWA_MEM.out.bam + ch_versions = ch_versions.mix(BWA_MEM.out.versions.first()) + + // MODULE: SAMBLASTER + SAMBLASTER ( ch_mem_bam ) + + ch_blasted_bam = SAMBLASTER.out.bam + ch_versions = ch_versions.mix(SAMBLASTER.out.versions.first()) + + emit: + bam = SAMBLASTER.out.bam // channel: [ val(meta), bam ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/pfr/fastq_bwa_mem_samblaster/meta.yml b/subworkflows/pfr/fastq_bwa_mem_samblaster/meta.yml new file mode 100644 index 0000000..6eaf302 --- /dev/null +++ b/subworkflows/pfr/fastq_bwa_mem_samblaster/meta.yml @@ -0,0 +1,52 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_bwa_mem_samblaster" +description: Index fasta if needed, map reads with BWA MEM and filter with samblaster +keywords: + - sort + - bam + - duplicate marking +components: + - bwa/index + - bwa/mem + - samblaster +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Input genome fasta file + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false, ref_id:'genome' ] + - bam: + type: file + description: Tagged or filtered BAM file + pattern: "*.bam" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test new file mode 100644 index 0000000..1e279e4 --- /dev/null +++ b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test @@ -0,0 +1,44 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_BWA_MEM_SAMBLASTER" + script "../main.nf" + workflow "FASTQ_BWA_MEM_SAMBLASTER" + config './nextflow.config' + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_bwa_mem_samblaster" + tag "samblaster" + tag "bwa/index" + tag "bwa/mem" + + + test("sarscov2-fq-gz") { + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test' ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + ) + input[1] = Channel.of( + [ [ id: 'genome' ], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), [] ] + ) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } +} diff --git a/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test.snap b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test.snap new file mode 100644 index 0000000..e5a9b18 --- /dev/null +++ b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "sarscov2-fq-gz": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "ref_id": "genome" + }, + "test.on.genome.samblaster.bam:md5,496319fc81c383a9ae6ab52592876c9b" + ] + ], + "1": [ + "versions.yml:md5,2ed54ca5e54063cb579273d0792465a7", + "versions.yml:md5,49d22cda9beaf6ea1a1ad838ef4a4255", + "versions.yml:md5,6a2baa7f2d1d555fe604e451624f414b" + ], + "bam": [ + [ + { + "id": "test", + "ref_id": "genome" + }, + "test.on.genome.samblaster.bam:md5,496319fc81c383a9ae6ab52592876c9b" + ] + ], + "versions": [ + "versions.yml:md5,2ed54ca5e54063cb579273d0792465a7", + "versions.yml:md5,49d22cda9beaf6ea1a1ad838ef4a4255", + "versions.yml:md5,6a2baa7f2d1d555fe604e451624f414b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-08T19:37:23.464854" + } +} \ No newline at end of file diff --git a/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/nextflow.config b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/nextflow.config new file mode 100644 index 0000000..23df52b --- /dev/null +++ b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + withName: BWA_MEM { + ext.prefix = { "${meta.id}.on.${meta.ref_id}.bwa.mem" } + ext.args = '-5SP' + } + + withName: SAMBLASTER { + ext.prefix = { "${meta.id}.on.${meta.ref_id}.samblaster" } + ext.args3 = '-h -F 2316' + } +} diff --git a/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/tags.yml b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/tags.yml new file mode 100644 index 0000000..810ecdd --- /dev/null +++ b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/fastq_bwa_mem_samblaster: + - subworkflows/pfr/fastq_bwa_mem_samblaster/** diff --git a/workflows/assemblyqc.nf b/workflows/assemblyqc.nf index 48e3c7d..9f521a0 100644 --- a/workflows/assemblyqc.nf +++ b/workflows/assemblyqc.nf @@ -35,6 +35,7 @@ include { ASSEMBLATHON_STATS } from '../modules/local/assemblatho include { FASTA_BUSCO_PLOT } from '../subworkflows/local/fasta_busco_plot' include { FASTA_LTRRETRIEVER_LAI } from '../subworkflows/pfr/fasta_ltrretriever_lai/main' include { FASTA_KRAKEN2 } from '../subworkflows/local/fasta_kraken2' +include { FQ2HIC } from '../subworkflows/local/fq2hic' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -50,7 +51,6 @@ include { GUNZIP as GUNZIP_FASTA } from '../modules/nf-core/gunzip/ma include { GUNZIP as GUNZIP_GFF3 } from '../modules/nf-core/gunzip/main' include { FASTAVALIDATOR } from '../modules/nf-core/fastavalidator/main' include { FASTA_EXPLORE_SEARCH_PLOT_TIDK } from '../subworkflows/nf-core/fasta_explore_search_plot_tidk/main' -include { FASTQ_TRIM_FASTP_FASTQC } from '../subworkflows/nf-core/fastq_trim_fastp_fastqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' @@ -76,7 +76,7 @@ workflow ASSEMBLYQC { } | branch { meta, fasta -> gz: "$fasta".endsWith(".gz") - rest: !"$fasta".endsWith(".gz") + rest: ! "$fasta".endsWith(".gz") } ch_assemby_gff3_branch = ch_input @@ -87,7 +87,7 @@ workflow ASSEMBLYQC { } | branch { meta, gff -> gz: "$gff".endsWith(".gz") - rest: !"$gff".endsWith(".gz") + rest: ! "$gff".endsWith(".gz") } ch_mono_ids = ch_input @@ -97,7 +97,7 @@ workflow ASSEMBLYQC { : null } - ch_hic_reads = !params.hic + ch_hic_reads = ! params.hic ? Channel.empty() : ( "$params.hic".find(/.*[\/].*\.(fastq|fq)\.gz/) @@ -176,7 +176,7 @@ workflow ASSEMBLYQC { | map { tag, report -> def is_clean = file(report).readLines().size < 2 - if (!is_clean) { + if (! is_clean) { log.warn(""" Adaptor contamination detected in ${tag}. See the report for further details. @@ -225,7 +225,7 @@ workflow ASSEMBLYQC { | map { tag, report -> def is_clean = file(report).readLines().size < 3 - if (!is_clean) { + if (! is_clean) { log.warn(""" Foreign organism contamination detected in ${tag}. See the report for further details. @@ -364,19 +364,21 @@ workflow ASSEMBLYQC { ch_kraken2_plot = FASTA_KRAKEN2.out.plot ch_versions = ch_versions.mix(FASTA_KRAKEN2.out.versions) - // SUBWORKFLOW: FASTQ_TRIM_FASTP_FASTQC + // SUBWORKFLOW: FQ2HIC + ch_hic_input_assembly = ! params.hic + ? Channel.empty() + : ch_clean_assembly + | map { tag, fa -> [ [ id: tag ], fa ] } - FASTQ_TRIM_FASTP_FASTQC( + FQ2HIC( ch_hic_reads, - [], - true, // val_save_trimmed_fail - false, // val_save_merged + ch_hic_input_assembly, params.hic_skip_fastp, params.hic_skip_fastqc ) - ch_cleaned_paired_reads = FASTQ_TRIM_FASTP_FASTQC.out.reads - ch_versions = ch_versions.mix(FASTQ_TRIM_FASTP_FASTQC.out.versions) + ch_hic_html = FQ2HIC.out.html + ch_versions = ch_versions.mix(FQ2HIC.out.versions) // MODULE: CUSTOM_DUMPSOFTWAREVERSIONS CUSTOM_DUMPSOFTWAREVERSIONS (