Skip to content

Commit

Permalink
Added FQ2HIC subworkflow
Browse files Browse the repository at this point in the history
  • Loading branch information
GallVp committed Feb 26, 2024
1 parent 1589033 commit f0bc5a6
Show file tree
Hide file tree
Showing 36 changed files with 1,420 additions and 13 deletions.
47 changes: 47 additions & 0 deletions bin/assembly2bedpe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env python

import sys
import pandas as pd


def read_assembly_file_cols(assembly_file_name):
with open(assembly_file_name, "r") as file:
lines = file.readlines()

list_of_items = [line.replace("\n", "").split(" ") for line in lines]
list_of_three_tuples = [items for items in list_of_items if len(items) == 3]
list_of_three_tuples_wt = [
[x[0], int(x[1]), int(x[2])] for x in list_of_three_tuples
]

df = pd.DataFrame(list_of_three_tuples_wt)
df.columns = ["name", "number", "length"]

return df


def make_bedpe_cols(assembly_file_pd):
pd = assembly_file_pd
pd["cum_length"] = pd["length"].cumsum()
pd["end_index"] = pd["cum_length"] - 1

start_index = pd["end_index"].shift(periods=1, fill_value=-1) + 1
pd["start_index"] = start_index

return pd


def print_bed_pe_file(bed_pe_df):
df = bed_pe_df
print("chr1\tx1\tx2\tchr2\ty1\ty2\tname\tscore\tstrand1\tstrand2\tcolor")
for index, row in df.iterrows():
print(
f"assembly\t{row['start_index']}\t{row['end_index']}\tassembly\t{row['start_index']}\t{row['end_index']}\t{row['name'].replace('>', '')}\t.\t.\t.\t0,0,255"
)


if __name__ == "__main__":
assembly_file_name = sys.argv[1]

assembly_file_cols = read_assembly_file_cols(assembly_file_name)
print_bed_pe_file(make_bedpe_cols(assembly_file_cols))
24 changes: 24 additions & 0 deletions bin/hic2html.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/usr/bin/env python

import sys
from pathlib import Path
import os


if __name__ == "__main__":
hic_file_name = os.path.basename(sys.argv[1])

projectDir = "/".join(__file__.split("/")[0:-1])
html_template_path = Path(
f"{projectDir}/report_modules/templates/hic/hic_html_template.html"
)

with open(html_template_path) as f:
html_file_lines = "".join(f.readlines())

filled_template = html_file_lines.replace("HIC_FILE_NAME", hic_file_name).replace(
"BEDPE_FILE_NAME",
f"{hic_file_name.replace('.hic', '')}.assembly.bedpe",
)

print(filled_template)
92 changes: 92 additions & 0 deletions bin/report_modules/templates/hic/hic_html_template.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
<html>
<head>
<meta charset="UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<title>HiC Contact Map</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" />
<link
rel="stylesheet"
type="text/css"
href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/juicebox.css"
/>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/juicebox.min.js"></script>
<style>
.app-container-wrapper {
display: flex;
align-items: center;
justify-content: center;
}

.header-wrapper {
display: flex;
align-items: center;
justify-content: center;
}

.stats-wrapper {
display: flex;
align-items: center;
justify-content: center;
}

button {
background-color: inherit;
float: left;
border: none;
outline: none;
cursor: pointer;
padding: 14px 16px;
transition: 0.3s;
font-size: 17px;
min-width: 90px;
margin: 8px;
}

button:hover {
background-color: #ddd;
}

button.active {
background-color: rgb(168, 168, 168);
}

.scaffold-heading {
font-size: 24px;
margin: 8px;
font-weight: 500;
}
</style>
</head>

<body>
<div class="app-container-wrapper">
<div id="app-container"></div>
</div>
</body>
<script>
const currentURLSegments = window.location.href.split("/");
currentURLSegments.pop();
const baseURL = currentURLSegments.join("/");

const container = document.getElementById("app-container");
const config = {
syncDatasets: false,
browsers: [
{
backgroundColor: "255,255,255",
url: `${baseURL}/HIC_FILE_NAME`,
name: "HIC_FILE_NAME",
state: "1,1,1,0,0,1,NONE",
tracks: [
{
name: "Scaffolds",
url: `${baseURL}/bedpe/BEDPE_FILE_NAME`,
},
],
},
],
};
juicebox.init(container, config).then(function (hicBrowser) {});
</script>
</html>
6 changes: 6 additions & 0 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ process {
withName:NCBI_FCS_GX_SCREEN_SAMPLES {
memory = { check_max( 512.GB * task.attempt, 'memory' ) }
}
withName:BWA_MEM {
time = { check_max( 2.day * task.attempt, 'time' ) }
}
withName:SAMBLASTER {
time = { check_max( 20.h * task.attempt, 'time' ) }
}
withName:CUSTOM_DUMPSOFTWAREVERSIONS {
cache = false
}
Expand Down
20 changes: 20 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@
"[email protected]:PlantandFoodResearch/nxf-modules.git": {
"modules": {
"pfr": {
"bwa/index": {
"branch": "main",
"git_sha": "4b9da80b1e4c16067babd97554bea42d7cd9ca85",
"installed_by": ["fastq_bwa_mem_samblaster"]
},
"bwa/mem": {
"branch": "main",
"git_sha": "835879b8f174bb4d2c5534d0381ffbe62cc1b060",
"installed_by": ["fastq_bwa_mem_samblaster"]
},
"cat/cat": {
"branch": "main",
"git_sha": "4b9da80b1e4c16067babd97554bea42d7cd9ca85",
Expand Down Expand Up @@ -59,6 +69,11 @@
"branch": "main",
"git_sha": "835879b8f174bb4d2c5534d0381ffbe62cc1b060",
"installed_by": ["fasta_ltrretriever_lai"]
},
"samblaster": {
"branch": "main",
"git_sha": "73358a6712178b9a67c39f92e65e8144b5880eae",
"installed_by": ["fastq_bwa_mem_samblaster"]
}
}
},
Expand All @@ -69,6 +84,11 @@
"git_sha": "154661d7c1769532ff7b5f11259644ec200dd47d",
"installed_by": ["subworkflows"]
},
"fastq_bwa_mem_samblaster": {
"branch": "main",
"git_sha": "9639ac9a556898d0f0e8592bff24585c33326458",
"installed_by": ["subworkflows"]
},
"gff3_validate": {
"branch": "main",
"git_sha": "f9b96bf8142a01f0649ff90570fb10aa973504b9",
Expand Down
19 changes: 19 additions & 0 deletions modules/local/agp2assembly.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
process AGP2ASSEMBLY {
tag "$sample_id_on_tag"
label 'process_single'

container "docker.io/gallvp/juicebox_scripts:a7ae991_ps"
publishDir "${params.outdir}/hic/assembly", mode:'copy'

input:
tuple val(sample_id_on_tag), path(agp_file)

output:
tuple val(sample_id_on_tag), path("*.agp.assembly"), emit: assembly

script:
"""
assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g')
agp2assembly.py $agp_file "\${assembly_tag}.agp.assembly"
"""
}
25 changes: 25 additions & 0 deletions modules/local/assembly2bedpe.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
process ASSEMBLY2BEDPE {
tag "$sample_id_on_tag"
label 'process_single'

container "docker.io/gallvp/python3npkgs:v0.4"
publishDir "${params.outdir}/hic/bedpe", mode:'copy'

input:
tuple val(sample_id_on_tag), path(agp_assembly_file)

output:
tuple val(sample_id_on_tag), path("*.assembly.bedpe"), emit: bedpe

script:
"""
assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g')
assembly2bedpe.py $agp_assembly_file > "\${assembly_tag}.assembly.bedpe"
"""

stub:
"""
assembly_tag=\$(echo $sample_id_on_tag | sed 's/.*\\.on\\.//g')
touch "\${assembly_tag}.assembly.bedpe"
"""
}
19 changes: 19 additions & 0 deletions modules/local/hic2html.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
process HIC2HTML {
tag "$sample_id_on_tag"
label 'process_single'

container "docker.io/gallvp/python3npkgs:v0.4"
publishDir "${params.outdir}/hic", mode: 'copy'

input:
tuple val(sample_id_on_tag), path(hic_file)

output:
path "*.html", emit: html

script:
"""
file_name="$hic_file"
hic2html.py "$hic_file" > "\${file_name%.*}.html"
"""
}
37 changes: 37 additions & 0 deletions modules/local/hicqc.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
process HICQC {
tag "$meta.id"
label 'process_single'

publishDir "${params.outdir}/hic/hic_qc", mode:'copy'
container "docker.io/gallvp/hic_qc:6881c33_ps"

input:
tuple val(meta), path(bam)

output:
tuple val(meta), path("*.pdf") , emit: pdf
path "versions.yml" , emit: versions

script:
"""
hic_qc.py \\
-n 10000000 \\
-b $bam \\
--outfile_prefix "$meta.id"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
hic_qc.py: \$(hic_qc.py --version)
END_VERSIONS
"""

stub:
"""
touch "${meta.id}.pdf"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
hic_qc.py: \$(hic_qc.py --version)
END_VERSIONS
"""
}
22 changes: 22 additions & 0 deletions modules/local/juicer_sort.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
process JUICER_SORT {
tag "$sample_id_on_tag"
label 'process_high'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04':
'nf-core/ubuntu:20.04' }"

input:
tuple val(sample_id_on_tag), path(out_links_txt)

output:
tuple val(sample_id_on_tag), path("*sorted.links.txt"), emit: links

script:
"""
sort --parallel=${task.cpus} \\
-k2,2 -k6,6 \\
$out_links_txt \\
> out.sorted.links.txt
"""
}
18 changes: 18 additions & 0 deletions modules/local/makeagpfromfasta.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
process MAKEAGPFROMFASTA {
tag "$sample_id_on_tag"
label 'process_single'

container "docker.io/gallvp/juicebox_scripts:a7ae991_ps"

input:
tuple val(sample_id_on_tag), path(assembly_fasta)

output:
tuple val(sample_id_on_tag), path("*.agp"), emit: agp

script:
"""
file_name="$assembly_fasta"
makeAgpFromFasta.py $assembly_fasta "\${file_name%%.*}.agp"
"""
}
19 changes: 19 additions & 0 deletions modules/local/matlock_bam2_juicer.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
process MATLOCK_BAM2_JUICER {
tag "$sample_id_on_tag"
label 'process_single'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/matlock:20181227--h4b03ef3_3':
'biocontainers/matlock:20181227--h4b03ef3_3' }"

input:
tuple val(sample_id_on_tag), path(hic_bam_scaffolds)

output:
tuple val(sample_id_on_tag), path("out.links.txt")

script:
"""
matlock bam2 juicer $hic_bam_scaffolds out.links.txt
"""
}
Loading

0 comments on commit f0bc5a6

Please sign in to comment.