diff --git a/bin/assembly2bedpe.py b/bin/assembly2bedpe.py
new file mode 100755
index 0000000..94e4968
--- /dev/null
+++ b/bin/assembly2bedpe.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+
+import sys
+import pandas as pd
+
+
+def read_assembly_file_cols(assembly_file_name):
+ with open(assembly_file_name, "r") as file:
+ lines = file.readlines()
+
+ list_of_items = [line.replace("\n", "").split(" ") for line in lines]
+ list_of_three_tuples = [items for items in list_of_items if len(items) == 3]
+ list_of_three_tuples_wt = [
+ [x[0], int(x[1]), int(x[2])] for x in list_of_three_tuples
+ ]
+
+ df = pd.DataFrame(list_of_three_tuples_wt)
+ df.columns = ["name", "number", "length"]
+
+ return df
+
+
+def make_bedpe_cols(assembly_file_pd):
+ pd = assembly_file_pd
+ pd["cum_length"] = pd["length"].cumsum()
+ pd["end_index"] = pd["cum_length"] - 1
+
+ start_index = pd["end_index"].shift(periods=1, fill_value=-1) + 1
+ pd["start_index"] = start_index
+
+ return pd
+
+
+def print_bed_pe_file(bed_pe_df):
+ df = bed_pe_df
+ print("chr1\tx1\tx2\tchr2\ty1\ty2\tname\tscore\tstrand1\tstrand2\tcolor")
+ for index, row in df.iterrows():
+ print(
+ f"assembly\t{row['start_index']}\t{row['end_index']}\tassembly\t{row['start_index']}\t{row['end_index']}\t{row['name'].replace('>', '')}\t.\t.\t.\t0,0,255"
+ )
+
+
+if __name__ == "__main__":
+ assembly_file_name = sys.argv[1]
+
+ assembly_file_cols = read_assembly_file_cols(assembly_file_name)
+ print_bed_pe_file(make_bedpe_cols(assembly_file_cols))
diff --git a/bin/hic2html.py b/bin/hic2html.py
new file mode 100755
index 0000000..eacc08f
--- /dev/null
+++ b/bin/hic2html.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+
+import sys
+from pathlib import Path
+import os
+
+
+if __name__ == "__main__":
+ hic_file_name = os.path.basename(sys.argv[1])
+
+ projectDir = "/".join(__file__.split("/")[0:-1])
+ html_template_path = Path(
+ f"{projectDir}/report_modules/templates/hic/hic_html_template.html"
+ )
+
+ with open(html_template_path) as f:
+ html_file_lines = "".join(f.readlines())
+
+ filled_template = html_file_lines.replace("HIC_FILE_NAME", hic_file_name).replace(
+ "BEDPE_FILE_NAME",
+ f"{hic_file_name.replace('.hic', '')}.assembly.bedpe",
+ )
+
+ print(filled_template)
diff --git a/bin/report_modules/templates/hic/hic_html_template.html b/bin/report_modules/templates/hic/hic_html_template.html
new file mode 100644
index 0000000..6d3f408
--- /dev/null
+++ b/bin/report_modules/templates/hic/hic_html_template.html
@@ -0,0 +1,92 @@
+
+
+
+
+ HiC Contact Map
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/conf/base.config b/conf/base.config
index e683193..9f5c47e 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -61,6 +61,12 @@ process {
withName:NCBI_FCS_GX_SCREEN_SAMPLES {
memory = { check_max( 512.GB * task.attempt, 'memory' ) }
}
+ withName:BWA_MEM {
+ time = { check_max( 2.day * task.attempt, 'time' ) }
+ }
+ withName:SAMBLASTER {
+ time = { check_max( 20.h * task.attempt, 'time' ) }
+ }
withName:CUSTOM_DUMPSOFTWAREVERSIONS {
cache = false
}
diff --git a/modules.json b/modules.json
index d68ea0e..a57849e 100644
--- a/modules.json
+++ b/modules.json
@@ -5,6 +5,16 @@
"git@github.com:PlantandFoodResearch/nxf-modules.git": {
"modules": {
"pfr": {
+ "bwa/index": {
+ "branch": "main",
+ "git_sha": "4b9da80b1e4c16067babd97554bea42d7cd9ca85",
+ "installed_by": ["fastq_bwa_mem_samblaster"]
+ },
+ "bwa/mem": {
+ "branch": "main",
+ "git_sha": "835879b8f174bb4d2c5534d0381ffbe62cc1b060",
+ "installed_by": ["fastq_bwa_mem_samblaster"]
+ },
"cat/cat": {
"branch": "main",
"git_sha": "4b9da80b1e4c16067babd97554bea42d7cd9ca85",
@@ -59,6 +69,11 @@
"branch": "main",
"git_sha": "835879b8f174bb4d2c5534d0381ffbe62cc1b060",
"installed_by": ["fasta_ltrretriever_lai"]
+ },
+ "samblaster": {
+ "branch": "main",
+ "git_sha": "73358a6712178b9a67c39f92e65e8144b5880eae",
+ "installed_by": ["fastq_bwa_mem_samblaster"]
}
}
},
@@ -69,6 +84,11 @@
"git_sha": "154661d7c1769532ff7b5f11259644ec200dd47d",
"installed_by": ["subworkflows"]
},
+ "fastq_bwa_mem_samblaster": {
+ "branch": "main",
+ "git_sha": "9639ac9a556898d0f0e8592bff24585c33326458",
+ "installed_by": ["subworkflows"]
+ },
"gff3_validate": {
"branch": "main",
"git_sha": "f9b96bf8142a01f0649ff90570fb10aa973504b9",
diff --git a/modules/local/agp2assembly.nf b/modules/local/agp2assembly.nf
new file mode 100644
index 0000000..edb60d9
--- /dev/null
+++ b/modules/local/agp2assembly.nf
@@ -0,0 +1,19 @@
+process AGP2ASSEMBLY {
+ tag "$sample_id_on_tag"
+ label 'process_single'
+
+ container "docker.io/gallvp/juicebox_scripts:a7ae991_ps"
+ publishDir "${params.outdir}/hic/assembly", mode:'copy'
+
+ input:
+ tuple val(sample_id_on_tag), path(agp_file)
+
+ output:
+ tuple val(sample_id_on_tag), path("*.agp.assembly"), emit: assembly
+
+ script:
+ """
+ assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g')
+ agp2assembly.py $agp_file "\${assembly_tag}.agp.assembly"
+ """
+}
diff --git a/modules/local/assembly2bedpe.nf b/modules/local/assembly2bedpe.nf
new file mode 100644
index 0000000..d0b7f6a
--- /dev/null
+++ b/modules/local/assembly2bedpe.nf
@@ -0,0 +1,25 @@
+process ASSEMBLY2BEDPE {
+ tag "$sample_id_on_tag"
+ label 'process_single'
+
+ container "docker.io/gallvp/python3npkgs:v0.4"
+ publishDir "${params.outdir}/hic/bedpe", mode:'copy'
+
+ input:
+ tuple val(sample_id_on_tag), path(agp_assembly_file)
+
+ output:
+ tuple val(sample_id_on_tag), path("*.assembly.bedpe"), emit: bedpe
+
+ script:
+ """
+ assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g')
+ assembly2bedpe.py $agp_assembly_file > "\${assembly_tag}.assembly.bedpe"
+ """
+
+ stub:
+ """
+ assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g')
+ touch "\${assembly_tag}.assembly.bedpe"
+ """
+}
diff --git a/modules/local/hic2html.nf b/modules/local/hic2html.nf
new file mode 100644
index 0000000..ba0df63
--- /dev/null
+++ b/modules/local/hic2html.nf
@@ -0,0 +1,19 @@
+process HIC2HTML {
+ tag "$sample_id_on_tag"
+ label 'process_single'
+
+ container "docker.io/gallvp/python3npkgs:v0.4"
+ publishDir "${params.outdir}/hic", mode: 'copy'
+
+ input:
+ tuple val(sample_id_on_tag), path(hic_file)
+
+ output:
+ path "*.html", emit: html
+
+ script:
+ """
+ file_name="$hic_file"
+ hic2html.py "$hic_file" > "\${file_name%.*}.html"
+ """
+}
diff --git a/modules/local/hicqc.nf b/modules/local/hicqc.nf
new file mode 100644
index 0000000..0f5c740
--- /dev/null
+++ b/modules/local/hicqc.nf
@@ -0,0 +1,37 @@
+process HICQC {
+ tag "$meta.id"
+ label 'process_single'
+
+ publishDir "${params.outdir}/hic/hic_qc", mode:'copy'
+ container "docker.io/gallvp/hic_qc:6881c33_ps"
+
+ input:
+ tuple val(meta), path(bam)
+
+ output:
+ tuple val(meta), path("*.pdf") , emit: pdf
+ path "versions.yml" , emit: versions
+
+ script:
+ """
+ hic_qc.py \\
+ -n 10000000 \\
+ -b $bam \\
+ --outfile_prefix "$meta.id"
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ hic_qc.py: \$(hic_qc.py --version)
+ END_VERSIONS
+ """
+
+ stub:
+ """
+ touch "${meta.id}.pdf"
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ hic_qc.py: \$(hic_qc.py --version)
+ END_VERSIONS
+ """
+}
diff --git a/modules/local/juicer_sort.nf b/modules/local/juicer_sort.nf
new file mode 100644
index 0000000..c57124b
--- /dev/null
+++ b/modules/local/juicer_sort.nf
@@ -0,0 +1,22 @@
+process JUICER_SORT {
+ tag "$sample_id_on_tag"
+ label 'process_high'
+
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/ubuntu:20.04':
+ 'nf-core/ubuntu:20.04' }"
+
+ input:
+ tuple val(sample_id_on_tag), path(out_links_txt)
+
+ output:
+ tuple val(sample_id_on_tag), path("*sorted.links.txt"), emit: links
+
+ script:
+ """
+ sort --parallel=${task.cpus} \\
+ -k2,2 -k6,6 \\
+ $out_links_txt \\
+ > out.sorted.links.txt
+ """
+}
diff --git a/modules/local/makeagpfromfasta.nf b/modules/local/makeagpfromfasta.nf
new file mode 100644
index 0000000..3c5a097
--- /dev/null
+++ b/modules/local/makeagpfromfasta.nf
@@ -0,0 +1,18 @@
+process MAKEAGPFROMFASTA {
+ tag "$sample_id_on_tag"
+ label 'process_single'
+
+ container "docker.io/gallvp/juicebox_scripts:a7ae991_ps"
+
+ input:
+ tuple val(sample_id_on_tag), path(assembly_fasta)
+
+ output:
+ tuple val(sample_id_on_tag), path("*.agp"), emit: agp
+
+ script:
+ """
+ file_name="$assembly_fasta"
+ makeAgpFromFasta.py $assembly_fasta "\${file_name%%.*}.agp"
+ """
+}
diff --git a/modules/local/matlock_bam2_juicer.nf b/modules/local/matlock_bam2_juicer.nf
new file mode 100644
index 0000000..5222acd
--- /dev/null
+++ b/modules/local/matlock_bam2_juicer.nf
@@ -0,0 +1,19 @@
+process MATLOCK_BAM2_JUICER {
+ tag "$sample_id_on_tag"
+ label 'process_single'
+
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/matlock:20181227--h4b03ef3_3':
+ 'biocontainers/matlock:20181227--h4b03ef3_3' }"
+
+ input:
+ tuple val(sample_id_on_tag), path(hic_bam_scaffolds)
+
+ output:
+ tuple val(sample_id_on_tag), path("out.links.txt")
+
+ script:
+ """
+ matlock bam2 juicer $hic_bam_scaffolds out.links.txt
+ """
+}
diff --git a/modules/local/runassemblyvisualizer.nf b/modules/local/runassemblyvisualizer.nf
new file mode 100644
index 0000000..a13ed92
--- /dev/null
+++ b/modules/local/runassemblyvisualizer.nf
@@ -0,0 +1,31 @@
+process RUNASSEMBLYVISUALIZER {
+ tag "$sample_id_on_tag"
+ label "process_medium"
+
+ publishDir "${params.outdir}/hic", mode:'copy'
+ container "docker.io/gallvp/3d-dna:63029aa"
+
+ input:
+ tuple val(sample_id_on_tag), path(agp_assembly_file), path(sorted_links_txt_file)
+
+ output:
+ tuple val(sample_id_on_tag), path("*.hic"), emit: hic
+
+ script:
+ // -p true/false Use GNU Parallel to speed up computation (default is true).
+ """
+ assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g')
+ file_name="${agp_assembly_file}"
+
+ /usr/src/3d-dna/visualize/run-assembly-visualizer.sh \\
+ $agp_assembly_file $sorted_links_txt_file
+
+ mv "\${file_name%.*}.hic" "\${assembly_tag}.hic"
+ """
+
+ stub:
+ """
+ assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g')
+ touch "\${assembly_tag}.hic"
+ """
+}
diff --git a/modules/pfr/bwa/index/environment.yml b/modules/pfr/bwa/index/environment.yml
new file mode 100644
index 0000000..5d3cb32
--- /dev/null
+++ b/modules/pfr/bwa/index/environment.yml
@@ -0,0 +1,7 @@
+name: bwa_index
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::bwa=0.7.17
diff --git a/modules/pfr/bwa/index/main.nf b/modules/pfr/bwa/index/main.nf
new file mode 100644
index 0000000..24b5a2e
--- /dev/null
+++ b/modules/pfr/bwa/index/main.nf
@@ -0,0 +1,53 @@
+process BWA_INDEX {
+ tag "$fasta"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' :
+ 'biocontainers/bwa:0.7.17--hed695b0_7' }"
+
+ input:
+ tuple val(meta), path(fasta)
+
+ output:
+ tuple val(meta), path(bwa) , emit: index
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def prefix = task.ext.prefix ?: "${fasta.baseName}"
+ def args = task.ext.args ?: ''
+ """
+ mkdir bwa
+ bwa \\
+ index \\
+ $args \\
+ -p bwa/${prefix} \\
+ $fasta
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${fasta.baseName}"
+ """
+ mkdir bwa
+
+ touch bwa/${prefix}.amb
+ touch bwa/${prefix}.ann
+ touch bwa/${prefix}.bwt
+ touch bwa/${prefix}.pac
+ touch bwa/${prefix}.sa
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/bwa/index/meta.yml b/modules/pfr/bwa/index/meta.yml
new file mode 100644
index 0000000..730628d
--- /dev/null
+++ b/modules/pfr/bwa/index/meta.yml
@@ -0,0 +1,45 @@
+name: bwa_index
+description: Create BWA index for reference genome
+keywords:
+ - index
+ - fasta
+ - genome
+ - reference
+tools:
+ - bwa:
+ description: |
+ BWA is a software package for mapping DNA sequences against
+ a large reference genome, such as the human genome.
+ homepage: http://bio-bwa.sourceforge.net/
+ documentation: http://www.htslib.org/doc/samtools.html
+ arxiv: arXiv:1303.3997
+ licence: ["GPL-3.0-or-later"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing reference information.
+ e.g. [ id:'test', single_end:false ]
+ - fasta:
+ type: file
+ description: Input genome fasta file
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing reference information.
+ e.g. [ id:'test', single_end:false ]
+ - index:
+ type: file
+ description: BWA genome index files
+ pattern: "*.{amb,ann,bwt,pac,sa}"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@drpatelh"
+ - "@maxulysse"
+maintainers:
+ - "@drpatelh"
+ - "@maxulysse"
diff --git a/modules/pfr/bwa/index/tests/main.nf.test b/modules/pfr/bwa/index/tests/main.nf.test
new file mode 100644
index 0000000..2f33c0e
--- /dev/null
+++ b/modules/pfr/bwa/index/tests/main.nf.test
@@ -0,0 +1,33 @@
+nextflow_process {
+
+ name "Test Process BWA_INDEX"
+ tag "modules_nfcore"
+ tag "modules"
+ tag "bwa"
+ tag "bwa/index"
+ script "../main.nf"
+ process "BWA_INDEX"
+
+ test("BWA index") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/pfr/bwa/index/tests/main.nf.test.snap b/modules/pfr/bwa/index/tests/main.nf.test.snap
new file mode 100644
index 0000000..e51ad5b
--- /dev/null
+++ b/modules/pfr/bwa/index/tests/main.nf.test.snap
@@ -0,0 +1,43 @@
+{
+ "BWA index": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ [
+ "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e",
+ "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567",
+ "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da",
+ "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66",
+ "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f"
+ ],
+ "index": [
+ [
+ {
+ "id": "test"
+ },
+ [
+ "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e",
+ "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567",
+ "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da",
+ "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66",
+ "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f"
+ ]
+ }
+ ],
+ "timestamp": "2023-10-17T17:20:20.180927714"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/bwa/index/tests/tags.yml b/modules/pfr/bwa/index/tests/tags.yml
new file mode 100644
index 0000000..28bb483
--- /dev/null
+++ b/modules/pfr/bwa/index/tests/tags.yml
@@ -0,0 +1,2 @@
+bwa/index:
+ - modules/nf-core/bwa/index/**
diff --git a/modules/pfr/bwa/mem/environment.yml b/modules/pfr/bwa/mem/environment.yml
new file mode 100644
index 0000000..3f136d0
--- /dev/null
+++ b/modules/pfr/bwa/mem/environment.yml
@@ -0,0 +1,10 @@
+name: bwa_mem
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bwa=0.7.17
+ # renovate: datasource=conda depName=bioconda/samtools
+ - samtools=1.19.2
+ - htslib=1.19.1
diff --git a/modules/pfr/bwa/mem/main.nf b/modules/pfr/bwa/mem/main.nf
new file mode 100644
index 0000000..54ec0f1
--- /dev/null
+++ b/modules/pfr/bwa/mem/main.nf
@@ -0,0 +1,55 @@
+process BWA_MEM {
+ tag "$meta.id"
+ label 'process_high'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:a34558545ae1413d94bde4578787ebef08027945-0' :
+ 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:a34558545ae1413d94bde4578787ebef08027945-0' }"
+
+ input:
+ tuple val(meta), path(reads)
+ tuple val(meta2), path(index)
+ val sort_bam
+
+ output:
+ tuple val(meta), path("*.bam"), emit: bam
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def args2 = task.ext.args2 ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def samtools_command = sort_bam ? 'sort' : 'view'
+ """
+ INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'`
+
+ bwa mem \\
+ $args \\
+ -t $task.cpus \\
+ \$INDEX \\
+ $reads \\
+ | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam -
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//')
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.bam
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//')
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/bwa/mem/meta.yml b/modules/pfr/bwa/mem/meta.yml
new file mode 100644
index 0000000..440fb1f
--- /dev/null
+++ b/modules/pfr/bwa/mem/meta.yml
@@ -0,0 +1,58 @@
+name: bwa_mem
+description: Performs fastq alignment to a fasta reference using BWA
+keywords:
+ - mem
+ - bwa
+ - alignment
+ - map
+ - fastq
+ - bam
+ - sam
+tools:
+ - bwa:
+ description: |
+ BWA is a software package for mapping DNA sequences against
+ a large reference genome, such as the human genome.
+ homepage: http://bio-bwa.sourceforge.net/
+ documentation: http://www.htslib.org/doc/samtools.html
+ arxiv: arXiv:1303.3997
+ licence: ["GPL-3.0-or-later"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+ - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information.
+ e.g. [ id:'test', single_end:false ]
+ - index:
+ type: file
+ description: BWA genome index files
+ pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}"
+ - sort_bam:
+ type: boolean
+ description: use samtools sort (true) or samtools view (false)
+ pattern: "true or false"
+output:
+ - bam:
+ type: file
+ description: Output BAM file containing read alignments
+ pattern: "*.{bam}"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@drpatelh"
+ - "@jeremy1805"
+maintainers:
+ - "@drpatelh"
+ - "@jeremy1805"
diff --git a/modules/pfr/bwa/mem/tests/main.nf.test b/modules/pfr/bwa/mem/tests/main.nf.test
new file mode 100644
index 0000000..cd6591f
--- /dev/null
+++ b/modules/pfr/bwa/mem/tests/main.nf.test
@@ -0,0 +1,173 @@
+nextflow_process {
+
+ name "Test Process BWA_MEM"
+ tag "modules_nfcore"
+ tag "modules"
+ tag "bwa"
+ tag "bwa/mem"
+ tag "bwa/index"
+ script "../main.nf"
+ process "BWA_MEM"
+
+ test("Single-End") {
+
+ setup {
+ run("BWA_INDEX") {
+ script "../../index/main.nf"
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ [
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ input[1] = BWA_INDEX.out.index
+ input[2] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("Single-End Sort") {
+
+ setup {
+ run("BWA_INDEX") {
+ script "../../index/main.nf"
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ [
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ input[1] = BWA_INDEX.out.index
+ input[2] = true
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("Paired-End") {
+
+ setup {
+ run("BWA_INDEX") {
+ script "../../index/main.nf"
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ input[1] = BWA_INDEX.out.index
+ input[2] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("Paired-End Sort") {
+
+ setup {
+ run("BWA_INDEX") {
+ script "../../index/main.nf"
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ input[1] = BWA_INDEX.out.index
+ input[2] = true
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+}
diff --git a/modules/pfr/bwa/mem/tests/main.nf.test.snap b/modules/pfr/bwa/mem/tests/main.nf.test.snap
new file mode 100644
index 0000000..e4fd8cc
--- /dev/null
+++ b/modules/pfr/bwa/mem/tests/main.nf.test.snap
@@ -0,0 +1,142 @@
+{
+ "Single-End": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,a74710a0345b4717bb4431bf9c257120"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,c32f719a68bb2966c8511d808154d42d"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,a74710a0345b4717bb4431bf9c257120"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,c32f719a68bb2966c8511d808154d42d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.01.0"
+ },
+ "timestamp": "2024-02-19T11:11:48.440661587"
+ },
+ "Single-End Sort": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,cb1e038bc4d990683fa485d632550b54"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,c32f719a68bb2966c8511d808154d42d"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,cb1e038bc4d990683fa485d632550b54"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,c32f719a68bb2966c8511d808154d42d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.01.0"
+ },
+ "timestamp": "2024-02-19T11:11:56.086493265"
+ },
+ "Paired-End": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam:md5,aea123a3828a99da1906126355f15a12"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,c32f719a68bb2966c8511d808154d42d"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam:md5,aea123a3828a99da1906126355f15a12"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,c32f719a68bb2966c8511d808154d42d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.01.0"
+ },
+ "timestamp": "2024-02-19T11:12:03.474974773"
+ },
+ "Paired-End Sort": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam:md5,4682087bcdc3617384b375093fecd8dd"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,c32f719a68bb2966c8511d808154d42d"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam:md5,4682087bcdc3617384b375093fecd8dd"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,c32f719a68bb2966c8511d808154d42d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.01.0"
+ },
+ "timestamp": "2024-02-19T11:12:10.721510817"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/bwa/mem/tests/tags.yml b/modules/pfr/bwa/mem/tests/tags.yml
new file mode 100644
index 0000000..82992d1
--- /dev/null
+++ b/modules/pfr/bwa/mem/tests/tags.yml
@@ -0,0 +1,3 @@
+bwa/mem:
+ - modules/nf-core/bwa/index/**
+ - modules/nf-core/bwa/mem/**
diff --git a/modules/pfr/samblaster/environment.yml b/modules/pfr/samblaster/environment.yml
new file mode 100644
index 0000000..a6e3038
--- /dev/null
+++ b/modules/pfr/samblaster/environment.yml
@@ -0,0 +1,9 @@
+name: samblaster
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::samblaster=0.1.26
+ - bioconda::samtools=1.19.2
+ - bioconda::htslib=1.19.1
diff --git a/modules/pfr/samblaster/main.nf b/modules/pfr/samblaster/main.nf
new file mode 100644
index 0000000..c9e89af
--- /dev/null
+++ b/modules/pfr/samblaster/main.nf
@@ -0,0 +1,50 @@
+process SAMBLASTER {
+ tag "$meta.id"
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:60ebac4ad9c6530c0d7bf6844f52ec6916e1e0b1-0' :
+ 'biocontainers/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:60ebac4ad9c6530c0d7bf6844f52ec6916e1e0b1-0' }"
+
+ input:
+ tuple val(meta), path(bam)
+
+ output:
+ tuple val(meta), path("*.bam"), emit: bam
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def args2 = task.ext.args2 ?: ''
+ def args3 = task.ext.args3 ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ if( "$bam" == "${prefix}.bam" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ samtools view -h $args2 $bam | \\
+ samblaster $args | \\
+ samtools view $args3 -Sb - >${prefix}.bam
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ samblaster: \$( samblaster -h 2>&1 | head -n 1 | sed 's/^samblaster: Version //' )
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ if( "$bam" == "${prefix}.bam" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ touch "${prefix}.bam"
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ samblaster: \$( samblaster -h 2>&1 | head -n 1 | sed 's/^samblaster: Version //' )
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/samblaster/meta.yml b/modules/pfr/samblaster/meta.yml
new file mode 100644
index 0000000..ccb4832
--- /dev/null
+++ b/modules/pfr/samblaster/meta.yml
@@ -0,0 +1,53 @@
+name: samblaster
+description: |
+ This module combines samtools and samblaster in order to use
+ samblaster capability to filter or tag SAM files, with the advantage
+ of maintaining both input and output in BAM format.
+ Samblaster input must contain a sequence header: for this reason it has been piped
+ with the "samtools view -h" command.
+ Additional desired arguments for samtools can be passed using:
+ options.args2 for the input bam file
+ options.args3 for the output bam file
+keywords:
+ - sort
+ - duplicate marking
+ - bam
+tools:
+ - samblaster:
+ description: |
+ samblaster is a fast and flexible program for marking duplicates in read-id grouped paired-end SAM files.
+ It can also optionally output discordant read pairs and/or split read mappings to separate SAM files,
+ and/or unmapped/clipped reads to a separate FASTQ file.
+ By default, samblaster reads SAM input from stdin and writes SAM to stdout.
+ documentation: https://github.com/GregoryFaust/samblaster
+ tool_dev_url: https://github.com/GregoryFaust/samblaster
+ doi: "10.1093/bioinformatics/btu314"
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - bam:
+ type: file
+ description: BAM file
+ pattern: "*.bam"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - bam:
+ type: file
+ description: Tagged or filtered BAM file
+ pattern: "*.bam"
+authors:
+ - "@lescai"
+maintainers:
+ - "@lescai"
diff --git a/subworkflows/local/fq2hic.nf b/subworkflows/local/fq2hic.nf
new file mode 100644
index 0000000..2e26cc2
--- /dev/null
+++ b/subworkflows/local/fq2hic.nf
@@ -0,0 +1,79 @@
+include { FASTQ_TRIM_FASTP_FASTQC } from '../nf-core/fastq_trim_fastp_fastqc/main'
+include { FASTQ_BWA_MEM_SAMBLASTER } from '../pfr/fastq_bwa_mem_samblaster/main'
+include { HICQC } from '../../modules/local/hicqc'
+include { MAKEAGPFROMFASTA } from '../../modules/local/makeagpfromfasta'
+include { AGP2ASSEMBLY } from '../../modules/local/agp2assembly'
+include { ASSEMBLY2BEDPE } from '../../modules/local/assembly2bedpe'
+include { MATLOCK_BAM2_JUICER } from '../../modules/local/matlock_bam2_juicer'
+include { JUICER_SORT } from '../../modules/local/juicer_sort'
+include { RUNASSEMBLYVISUALIZER } from '../../modules/local/runassemblyvisualizer'
+include { HIC2HTML } from '../../modules/local/hic2html'
+
+workflow FQ2HIC {
+ take:
+ reads // [ val(meta), [ fq ] ]
+ ref // [ val(meta2), fa ]
+ hic_skip_fastp // val: true|false
+ hic_skip_fastqc // val: true|false
+
+ main:
+ ch_versions = Channel.empty()
+
+ // SUBWORKFLOW: FASTQ_TRIM_FASTP_FASTQC
+ FASTQ_TRIM_FASTP_FASTQC(
+ reads,
+ [],
+ true, // val_save_trimmed_fail
+ false, // val_save_merged
+ hic_skip_fastp,
+ hic_skip_fastqc
+ )
+
+ ch_trim_reads = FASTQ_TRIM_FASTP_FASTQC.out.reads
+ ch_versions = ch_versions.mix(FASTQ_TRIM_FASTP_FASTQC.out.versions)
+
+ // SUBWORKFLOW: FASTQ_BWA_MEM_SAMBLASTER
+ FASTQ_BWA_MEM_SAMBLASTER(
+ ch_trim_reads,
+ ref.map { meta2, fa -> [ meta2, fa, [] ] }
+ )
+
+ ch_bam = FASTQ_BWA_MEM_SAMBLASTER.out.bam
+ ch_versions = ch_versions.mix(FASTQ_BWA_MEM_SAMBLASTER.out.versions)
+
+ // MODULE: HICQC
+ ch_bam_and_ref = ch_bam
+ | map { meta, bam -> [ meta.ref_id, meta, bam ] }
+ | join(
+ ref.map { meta2, fa -> [ meta2.id, fa ] }
+ )
+ | map { ref_id, meta, bam, fa ->
+ [ [ id: "${meta.id}.on.${meta.ref_id}" ], bam, fa ]
+ }
+
+ HICQC ( ch_bam_and_ref.map { meta3, bam, fa -> [ meta3, bam ] } )
+
+ ch_versions = ch_versions.mix(HICQC.out.versions)
+
+ // MODULE: MAKEAGPFROMFASTA | AGP2ASSEMBLY | ASSEMBLY2BEDPE
+ MAKEAGPFROMFASTA ( ch_bam_and_ref.map { meta3, bam, fa -> [ meta3.id, fa ] } )
+ | AGP2ASSEMBLY
+ | ASSEMBLY2BEDPE
+
+ // MODULE: MATLOCK_BAM2_JUICER | JUICER_SORT
+ MATLOCK_BAM2_JUICER ( ch_bam_and_ref.map { meta3, bam, fa -> [ meta3.id, bam ] } )
+ | JUICER_SORT
+
+ // MODULE: RUNASSEMBLYVISUALIZER
+ RUNASSEMBLYVISUALIZER ( AGP2ASSEMBLY.out.assembly.join(JUICER_SORT.out.links) )
+
+ ch_hic = RUNASSEMBLYVISUALIZER.out.hic
+
+ // MODULE: HIC2HTML
+ HIC2HTML ( ch_hic )
+
+ emit:
+ hic = ch_hic
+ html = HIC2HTML.out.html
+ versions = ch_versions
+}
diff --git a/subworkflows/pfr/fastq_bwa_mem_samblaster/main.nf b/subworkflows/pfr/fastq_bwa_mem_samblaster/main.nf
new file mode 100644
index 0000000..50e824a
--- /dev/null
+++ b/subworkflows/pfr/fastq_bwa_mem_samblaster/main.nf
@@ -0,0 +1,61 @@
+include { BWA_INDEX } from '../../../modules/pfr/bwa/index/main'
+include { BWA_MEM } from '../../../modules/pfr/bwa/mem/main'
+include { SAMBLASTER } from '../../../modules/pfr/samblaster/main'
+
+workflow FASTQ_BWA_MEM_SAMBLASTER {
+
+ take:
+ ch_fastq // channel: [ val(meta), [ fq ] ]
+ ch_reference // channel: [ val(meta2), fasta, index ]; fast | index
+
+ main:
+ ch_versions = Channel.empty()
+
+ ch_has_index = ch_reference
+ | branch { meta2, fasta, index ->
+ yes: index
+ no: !index
+ }
+
+ // MODULE: BWA_INDEX
+ BWA_INDEX ( ch_has_index.no.map { meta2, fasta, index -> [ meta2, fasta ] } )
+
+ ch_bwa_index = BWA_INDEX.out.index
+ | mix(
+ ch_has_index.yes
+ | map { meta2, fasta, index ->
+ [ meta2, index ]
+ }
+ )
+
+ ch_versions = ch_versions.mix(BWA_INDEX.out.versions.first())
+
+ // MODULE: BWA_MEM
+ ch_mem_inputs = ch_fastq
+ | combine(
+ ch_bwa_index
+ )
+ | map { meta, fq, meta2, index ->
+ [ meta + [ ref_id: meta2.id ], fq, index ]
+ }
+
+ def sort_bam = false
+ BWA_MEM(
+ ch_mem_inputs.map { meta, fq, index -> [ meta, fq ] },
+ ch_mem_inputs.map { meta, fq, index -> [ [], index ] },
+ sort_bam
+ )
+
+ ch_mem_bam = BWA_MEM.out.bam
+ ch_versions = ch_versions.mix(BWA_MEM.out.versions.first())
+
+ // MODULE: SAMBLASTER
+ SAMBLASTER ( ch_mem_bam )
+
+ ch_blasted_bam = SAMBLASTER.out.bam
+ ch_versions = ch_versions.mix(SAMBLASTER.out.versions.first())
+
+ emit:
+ bam = SAMBLASTER.out.bam // channel: [ val(meta), bam ]
+ versions = ch_versions // channel: [ versions.yml ]
+}
diff --git a/subworkflows/pfr/fastq_bwa_mem_samblaster/meta.yml b/subworkflows/pfr/fastq_bwa_mem_samblaster/meta.yml
new file mode 100644
index 0000000..6eaf302
--- /dev/null
+++ b/subworkflows/pfr/fastq_bwa_mem_samblaster/meta.yml
@@ -0,0 +1,52 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "fastq_bwa_mem_samblaster"
+description: Index fasta if needed, map reads with BWA MEM and filter with samblaster
+keywords:
+ - sort
+ - bam
+ - duplicate marking
+components:
+ - bwa/index
+ - bwa/mem
+ - samblaster
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+ - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'genome' ]
+ - fasta:
+ type: file
+ description: Input genome fasta file
+ - index:
+ type: file
+ description: BWA genome index files
+ pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false, ref_id:'genome' ]
+ - bam:
+ type: file
+ description: Tagged or filtered BAM file
+ pattern: "*.bam"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test
new file mode 100644
index 0000000..1e279e4
--- /dev/null
+++ b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test
@@ -0,0 +1,44 @@
+nextflow_workflow {
+
+ name "Test Subworkflow FASTQ_BWA_MEM_SAMBLASTER"
+ script "../main.nf"
+ workflow "FASTQ_BWA_MEM_SAMBLASTER"
+ config './nextflow.config'
+
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "subworkflows/fastq_bwa_mem_samblaster"
+ tag "samblaster"
+ tag "bwa/index"
+ tag "bwa/mem"
+
+
+ test("sarscov2-fq-gz") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of(
+ [
+ [ id:'test' ],
+ [
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ )
+ input[1] = Channel.of(
+ [ [ id: 'genome' ], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), [] ]
+ )
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(workflow.out).match()}
+ )
+ }
+ }
+}
diff --git a/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test.snap b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test.snap
new file mode 100644
index 0000000..e5a9b18
--- /dev/null
+++ b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test.snap
@@ -0,0 +1,41 @@
+{
+ "sarscov2-fq-gz": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "ref_id": "genome"
+ },
+ "test.on.genome.samblaster.bam:md5,496319fc81c383a9ae6ab52592876c9b"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,2ed54ca5e54063cb579273d0792465a7",
+ "versions.yml:md5,49d22cda9beaf6ea1a1ad838ef4a4255",
+ "versions.yml:md5,6a2baa7f2d1d555fe604e451624f414b"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "ref_id": "genome"
+ },
+ "test.on.genome.samblaster.bam:md5,496319fc81c383a9ae6ab52592876c9b"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,2ed54ca5e54063cb579273d0792465a7",
+ "versions.yml:md5,49d22cda9beaf6ea1a1ad838ef4a4255",
+ "versions.yml:md5,6a2baa7f2d1d555fe604e451624f414b"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-08T19:37:23.464854"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/nextflow.config b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/nextflow.config
new file mode 100644
index 0000000..23df52b
--- /dev/null
+++ b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/nextflow.config
@@ -0,0 +1,11 @@
+process {
+ withName: BWA_MEM {
+ ext.prefix = { "${meta.id}.on.${meta.ref_id}.bwa.mem" }
+ ext.args = '-5SP'
+ }
+
+ withName: SAMBLASTER {
+ ext.prefix = { "${meta.id}.on.${meta.ref_id}.samblaster" }
+ ext.args3 = '-h -F 2316'
+ }
+}
diff --git a/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/tags.yml b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/tags.yml
new file mode 100644
index 0000000..810ecdd
--- /dev/null
+++ b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/fastq_bwa_mem_samblaster:
+ - subworkflows/pfr/fastq_bwa_mem_samblaster/**
diff --git a/workflows/assemblyqc.nf b/workflows/assemblyqc.nf
index 48e3c7d..9f521a0 100644
--- a/workflows/assemblyqc.nf
+++ b/workflows/assemblyqc.nf
@@ -35,6 +35,7 @@ include { ASSEMBLATHON_STATS } from '../modules/local/assemblatho
include { FASTA_BUSCO_PLOT } from '../subworkflows/local/fasta_busco_plot'
include { FASTA_LTRRETRIEVER_LAI } from '../subworkflows/pfr/fasta_ltrretriever_lai/main'
include { FASTA_KRAKEN2 } from '../subworkflows/local/fasta_kraken2'
+include { FQ2HIC } from '../subworkflows/local/fq2hic'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -50,7 +51,6 @@ include { GUNZIP as GUNZIP_FASTA } from '../modules/nf-core/gunzip/ma
include { GUNZIP as GUNZIP_GFF3 } from '../modules/nf-core/gunzip/main'
include { FASTAVALIDATOR } from '../modules/nf-core/fastavalidator/main'
include { FASTA_EXPLORE_SEARCH_PLOT_TIDK } from '../subworkflows/nf-core/fasta_explore_search_plot_tidk/main'
-include { FASTQ_TRIM_FASTP_FASTQC } from '../subworkflows/nf-core/fastq_trim_fastp_fastqc/main'
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
@@ -76,7 +76,7 @@ workflow ASSEMBLYQC {
}
| branch { meta, fasta ->
gz: "$fasta".endsWith(".gz")
- rest: !"$fasta".endsWith(".gz")
+ rest: ! "$fasta".endsWith(".gz")
}
ch_assemby_gff3_branch = ch_input
@@ -87,7 +87,7 @@ workflow ASSEMBLYQC {
}
| branch { meta, gff ->
gz: "$gff".endsWith(".gz")
- rest: !"$gff".endsWith(".gz")
+ rest: ! "$gff".endsWith(".gz")
}
ch_mono_ids = ch_input
@@ -97,7 +97,7 @@ workflow ASSEMBLYQC {
: null
}
- ch_hic_reads = !params.hic
+ ch_hic_reads = ! params.hic
? Channel.empty()
: (
"$params.hic".find(/.*[\/].*\.(fastq|fq)\.gz/)
@@ -176,7 +176,7 @@ workflow ASSEMBLYQC {
| map { tag, report ->
def is_clean = file(report).readLines().size < 2
- if (!is_clean) {
+ if (! is_clean) {
log.warn("""
Adaptor contamination detected in ${tag}.
See the report for further details.
@@ -225,7 +225,7 @@ workflow ASSEMBLYQC {
| map { tag, report ->
def is_clean = file(report).readLines().size < 3
- if (!is_clean) {
+ if (! is_clean) {
log.warn("""
Foreign organism contamination detected in ${tag}.
See the report for further details.
@@ -364,19 +364,21 @@ workflow ASSEMBLYQC {
ch_kraken2_plot = FASTA_KRAKEN2.out.plot
ch_versions = ch_versions.mix(FASTA_KRAKEN2.out.versions)
- // SUBWORKFLOW: FASTQ_TRIM_FASTP_FASTQC
+ // SUBWORKFLOW: FQ2HIC
+ ch_hic_input_assembly = ! params.hic
+ ? Channel.empty()
+ : ch_clean_assembly
+ | map { tag, fa -> [ [ id: tag ], fa ] }
- FASTQ_TRIM_FASTP_FASTQC(
+ FQ2HIC(
ch_hic_reads,
- [],
- true, // val_save_trimmed_fail
- false, // val_save_merged
+ ch_hic_input_assembly,
params.hic_skip_fastp,
params.hic_skip_fastqc
)
- ch_cleaned_paired_reads = FASTQ_TRIM_FASTP_FASTQC.out.reads
- ch_versions = ch_versions.mix(FASTQ_TRIM_FASTP_FASTQC.out.versions)
+ ch_hic_html = FQ2HIC.out.html
+ ch_versions = ch_versions.mix(FQ2HIC.out.versions)
// MODULE: CUSTOM_DUMPSOFTWAREVERSIONS
CUSTOM_DUMPSOFTWAREVERSIONS (