From 2936122cc1522c6fa52d777bfc140cee38d784d7 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Mon, 22 Jul 2024 22:17:00 +0200 Subject: [PATCH 1/7] Install macs3 module --- modules.json | 5 + .../nf-core/macs3/callpeak/environment.yml | 9 + modules/nf-core/macs3/callpeak/main.nf | 70 ++++ modules/nf-core/macs3/callpeak/meta.yml | 73 ++++ .../nf-core/macs3/callpeak/tests/bam.config | 5 + .../nf-core/macs3/callpeak/tests/bed.config | 5 + .../nf-core/macs3/callpeak/tests/main.nf.test | 113 ++++++ .../macs3/callpeak/tests/main.nf.test.snap | 358 ++++++++++++++++++ 8 files changed, 638 insertions(+) create mode 100644 modules/nf-core/macs3/callpeak/environment.yml create mode 100644 modules/nf-core/macs3/callpeak/main.nf create mode 100644 modules/nf-core/macs3/callpeak/meta.yml create mode 100644 modules/nf-core/macs3/callpeak/tests/bam.config create mode 100644 modules/nf-core/macs3/callpeak/tests/bed.config create mode 100644 modules/nf-core/macs3/callpeak/tests/main.nf.test create mode 100644 modules/nf-core/macs3/callpeak/tests/main.nf.test.snap diff --git a/modules.json b/modules.json index f4d4950b..33567a0b 100644 --- a/modules.json +++ b/modules.json @@ -90,6 +90,11 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "macs3/callpeak": { + "branch": "master", + "git_sha": "6046db922ee6c425a4dff26eeda122dffc3df9c0", + "installed_by": ["modules"] + }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", diff --git a/modules/nf-core/macs3/callpeak/environment.yml b/modules/nf-core/macs3/callpeak/environment.yml new file mode 100644 index 00000000..d9caa561 --- /dev/null +++ b/modules/nf-core/macs3/callpeak/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "macs3_callpeak" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::macs3=3.0.1" diff --git a/modules/nf-core/macs3/callpeak/main.nf b/modules/nf-core/macs3/callpeak/main.nf new file mode 100644 index 00000000..53a1a200 --- /dev/null +++ b/modules/nf-core/macs3/callpeak/main.nf @@ -0,0 +1,70 @@ + +process MACS3_CALLPEAK { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/macs3:3.0.1--py311h0152c62_3': + 'biocontainers/macs3:3.0.1--py311h0152c62_3' }" + + input: + tuple val(meta), path(ipbam), path(controlbam) + val macs3_gsize + + output: + tuple val(meta), path("*.{narrowPeak,broadPeak}"), emit: peak + tuple val(meta), path("*.xls") , emit: xls + path "versions.yml" , emit: versions + + tuple val(meta), path("*.gappedPeak"), optional:true, emit: gapped + tuple val(meta), path("*.bed") , optional:true, emit: bed + tuple val(meta), path("*.bdg") , optional:true, emit: bdg + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def args_list = args.tokenize() + def format = meta.single_end ? 'BAM' : 'BAMPE' + def control = controlbam ? "--control $controlbam" : '' + if(args_list.contains('--format')){ + def id = args_list.findIndexOf{it=='--format'} + format = args_list[id+1] + args_list.remove(id+1) + args_list.remove(id) + } + """ + macs3 \\ + callpeak \\ + ${args_list.join(' ')} \\ + --gsize $macs3_gsize \\ + --format $format \\ + --name $prefix \\ + --treatment $ipbam \\ + $control + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + macs3: \$(macs3 --version | sed -e "s/macs3 //g") + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.gappedPeak + touch ${prefix}.bed + touch ${prefix}.bdg + touch ${prefix}.narrowPeak + touch ${prefix}.xls + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + macs3: \$(macs3 --version | sed -e "s/macs3 //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/macs3/callpeak/meta.yml b/modules/nf-core/macs3/callpeak/meta.yml new file mode 100644 index 00000000..b1c7cade --- /dev/null +++ b/modules/nf-core/macs3/callpeak/meta.yml @@ -0,0 +1,73 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "macs3_callpeak" +description: Peak calling of enriched genomic regions of ChIP-seq and ATAC-seq experiments +keywords: + - alignment + - atac-seq + - chip-seq + - peak-calling +tools: + - macs3: + description: "Model Based Analysis for ChIP-Seq data" + homepage: "https://macs3-project.github.io/MACS/" + documentation: "https://macs3-project.github.io/MACS/" + tool_dev_url: "https://github.com/macs3-project/MACS/" + doi: "10.1101/496521" + licence: ["BSD-3-clause"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample_1', single_end:false ]` + - ipbam: + type: file + description: The ChIP-seq treatment file + - controlbam: + type: file + description: The control file + - macs2_gsize: + type: string + description: | + Effective genome size. It can be 1.0e+9 or 1000000000, + or shortcuts:'hs' for human (2,913,022,398), 'mm' for mouse + (2,652,783,500), 'ce' for C. elegans (100,286,401) + and 'dm' for fruitfly (142,573,017), Default:hs. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software version + pattern: "versions.yml" + - peak: + type: file + description: BED file containing annotated peaks + pattern: "*.gappedPeak,*.narrowPeak}" + - xls: + type: file + description: xls file containing annotated peaks + pattern: "*.xls" + - gapped: + type: file + description: Optional BED file containing gapped peak + pattern: "*.gappedPeak" + - bed: + type: file + description: Optional BED file containing peak summits locations for every peak + pattern: "*.bed" + - bdg: + type: file + description: Optional bedGraph files for input and treatment input samples + pattern: "*.bdg" + +authors: + - "@JoseEspinosa" +maintainers: + - "@JoseEspinosa" diff --git a/modules/nf-core/macs3/callpeak/tests/bam.config b/modules/nf-core/macs3/callpeak/tests/bam.config new file mode 100644 index 00000000..217e3107 --- /dev/null +++ b/modules/nf-core/macs3/callpeak/tests/bam.config @@ -0,0 +1,5 @@ +process { + withName: 'MACS3_CALLPEAK' { + ext.args = '--qval 0.1' + } +} \ No newline at end of file diff --git a/modules/nf-core/macs3/callpeak/tests/bed.config b/modules/nf-core/macs3/callpeak/tests/bed.config new file mode 100644 index 00000000..19444006 --- /dev/null +++ b/modules/nf-core/macs3/callpeak/tests/bed.config @@ -0,0 +1,5 @@ +process { + withName: 'MACS3_CALLPEAK' { + ext.args = '--format BED --qval 10 --nomodel --extsize 200' + } +} \ No newline at end of file diff --git a/modules/nf-core/macs3/callpeak/tests/main.nf.test b/modules/nf-core/macs3/callpeak/tests/main.nf.test new file mode 100644 index 00000000..4338c96b --- /dev/null +++ b/modules/nf-core/macs3/callpeak/tests/main.nf.test @@ -0,0 +1,113 @@ +nextflow_process { + + name "Test Process MACS3_CALLPEAK" + script "../main.nf" + process "MACS3_CALLPEAK" + + tag "modules" + tag "modules_nfcore" + tag "macs3" + tag "macs3/callpeak" + + test("homo_sapiens - callpeak - bed") { + + when { + config "./bed.config" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bed/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.bed', checkIfExists: true) ], + [] + ] + input[1] = 4000 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - callpeak - bam") { + + when { + config "./bam.config" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.name.sorted.bam', checkIfExists: true) ], + [] + ] + input[1] = 40000 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - callpeak - control - bam") { + + when { + config "./bam.config" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.name.sorted.bam', checkIfExists: true) ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.name.sorted.bam', checkIfExists: true) ] + ] + input[1] = 40000 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) ], + [] + ] + input[1] = 40000 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/macs3/callpeak/tests/main.nf.test.snap b/modules/nf-core/macs3/callpeak/tests/main.nf.test.snap new file mode 100644 index 00000000..d6d98292 --- /dev/null +++ b/modules/nf-core/macs3/callpeak/tests/main.nf.test.snap @@ -0,0 +1,358 @@ +{ + "homo_sapiens - callpeak - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.narrowPeak:md5,2e4da1c1704595e12aaf99cc715ad70c" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.xls:md5,221852e4639574d2f53cf1917efa4922" + ] + ], + "2": [ + "versions.yml:md5,cb33970f9aaa0730733abe2fd9cb2b74" + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test_summits.bed:md5,26f0f97b6c14dbca129e947a58067c82" + ] + ], + "5": [ + + ], + "bdg": [ + + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test_summits.bed:md5,26f0f97b6c14dbca129e947a58067c82" + ] + ], + "gapped": [ + + ], + "peak": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.narrowPeak:md5,2e4da1c1704595e12aaf99cc715ad70c" + ] + ], + "versions": [ + "versions.yml:md5,cb33970f9aaa0730733abe2fd9cb2b74" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.xls:md5,221852e4639574d2f53cf1917efa4922" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-22T17:04:31.629715" + }, + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.narrowPeak:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,cb33970f9aaa0730733abe2fd9cb2b74" + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gappedPeak:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bdg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bdg": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bdg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gapped": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gappedPeak:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "peak": [ + [ + { + "id": "test", + "single_end": false + }, + "test.narrowPeak:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,cb33970f9aaa0730733abe2fd9cb2b74" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-22T17:04:58.589844" + }, + "homo_sapiens - callpeak - control - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.narrowPeak:md5,653e1108cc57ca07d0f60fc0f4fb8ba3" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.xls:md5,9fce04613bdc9c8372a9f884aa0d5aa6" + ] + ], + "2": [ + "versions.yml:md5,cb33970f9aaa0730733abe2fd9cb2b74" + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test_summits.bed:md5,4f3c7c53a1d730d90d1b3dd9d3197af4" + ] + ], + "5": [ + + ], + "bdg": [ + + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test_summits.bed:md5,4f3c7c53a1d730d90d1b3dd9d3197af4" + ] + ], + "gapped": [ + + ], + "peak": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.narrowPeak:md5,653e1108cc57ca07d0f60fc0f4fb8ba3" + ] + ], + "versions": [ + "versions.yml:md5,cb33970f9aaa0730733abe2fd9cb2b74" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.xls:md5,9fce04613bdc9c8372a9f884aa0d5aa6" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-22T17:04:44.063426" + }, + "homo_sapiens - callpeak - bed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.narrowPeak:md5,10e7d4747f8a2513e5ebb04856a51673" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.xls:md5,14efbf7137623df5aaf282b506ac9601" + ] + ], + "2": [ + "versions.yml:md5,cb33970f9aaa0730733abe2fd9cb2b74" + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test_summits.bed:md5,28833eeb7816688f0d698f51670be946" + ] + ], + "5": [ + + ], + "bdg": [ + + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test_summits.bed:md5,28833eeb7816688f0d698f51670be946" + ] + ], + "gapped": [ + + ], + "peak": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.narrowPeak:md5,10e7d4747f8a2513e5ebb04856a51673" + ] + ], + "versions": [ + "versions.yml:md5,cb33970f9aaa0730733abe2fd9cb2b74" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.xls:md5,14efbf7137623df5aaf282b506ac9601" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-22T17:04:16.697163" + } +} \ No newline at end of file From dabbe8cf77c6a0a06358f14c23d53f40481cffe5 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Mon, 22 Jul 2024 22:31:48 +0200 Subject: [PATCH 2/7] Remove macs2/callpeak module --- modules.json | 198 ++++++++++++++++++------ modules/nf-core/macs2/callpeak/main.nf | 53 ------- modules/nf-core/macs2/callpeak/meta.yml | 63 -------- 3 files changed, 148 insertions(+), 166 deletions(-) delete mode 100644 modules/nf-core/macs2/callpeak/main.nf delete mode 100644 modules/nf-core/macs2/callpeak/meta.yml diff --git a/modules.json b/modules.json index 33567a0b..a1bfa4c7 100644 --- a/modules.json +++ b/modules.json @@ -8,177 +8,252 @@ "bowtie2/align": { "branch": "master", "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde", - "installed_by": ["fastq_align_bowtie2", "modules"] + "installed_by": [ + "fastq_align_bowtie2", + "modules" + ] }, "bowtie2/build": { "branch": "master", "git_sha": "6a24fbe314bb2e6fe6306c29a63076ea87e8eb3c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwa/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwa/mem": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["fastq_align_bwa", "modules"] + "installed_by": [ + "fastq_align_bwa", + "modules" + ] }, "chromap/chromap": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["fastq_align_chromap", "modules"] + "installed_by": [ + "fastq_align_chromap", + "modules" + ] }, "chromap/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/getchromsizes": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deeptools/computematrix": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deeptools/plotfingerprint": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deeptools/plotheatmap": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deeptools/plotprofile": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastqc": { "branch": "master", "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", - "installed_by": ["fastq_fastqc_umitools_trimgalore", "modules"] + "installed_by": [ + "fastq_fastqc_umitools_trimgalore", + "modules" + ] }, "gffread": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gunzip": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "homer/annotatepeaks": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "khmer/uniquekmers": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] - }, - "macs2/callpeak": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "macs3/callpeak": { "branch": "master", "git_sha": "6046db922ee6c425a4dff26eeda122dffc3df9c0", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "phantompeakqualtools": { "branch": "master", "git_sha": "2dfe9afa90fefc70e320140e5f41287f01f324b0", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/collectmultiplemetrics": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/markduplicates": { "branch": "master", "git_sha": "ec833ac4c29db6005d18baccf3306f557c46b006", - "installed_by": ["bam_markduplicates_picard", "modules"] + "installed_by": [ + "bam_markduplicates_picard", + "modules" + ] }, "picard/mergesamfiles": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "preseq/lcextrap": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/flagstat": { "branch": "master", "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", - "installed_by": ["bam_stats_samtools", "modules"] + "installed_by": [ + "bam_stats_samtools", + "modules" + ] }, "samtools/idxstats": { "branch": "master", "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", - "installed_by": ["bam_stats_samtools", "modules"] + "installed_by": [ + "bam_stats_samtools", + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", - "installed_by": ["bam_markduplicates_picard", "bam_sort_stats_samtools", "modules"] + "installed_by": [ + "bam_markduplicates_picard", + "bam_sort_stats_samtools", + "modules" + ] }, "samtools/sort": { "branch": "master", "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", - "installed_by": ["bam_sort_stats_samtools", "modules"] + "installed_by": [ + "bam_sort_stats_samtools", + "modules" + ] }, "samtools/stats": { "branch": "master", "git_sha": "ec833ac4c29db6005d18baccf3306f557c46b006", - "installed_by": ["bam_stats_samtools", "modules"] + "installed_by": [ + "bam_stats_samtools", + "modules" + ] }, "subread/featurecounts": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "trimgalore": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["fastq_fastqc_umitools_trimgalore", "modules"] + "installed_by": [ + "fastq_fastqc_umitools_trimgalore", + "modules" + ] }, "ucsc/bedgraphtobigwig": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "umitools/extract": { "branch": "master", "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", - "installed_by": ["fastq_fastqc_umitools_trimgalore"] + "installed_by": [ + "fastq_fastqc_umitools_trimgalore" + ] }, "untar": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untarfiles": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -187,55 +262,78 @@ "bam_markduplicates_picard": { "branch": "master", "git_sha": "0c38be7e652a0b2f3a37681ee4c0dbdf85677647", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "bam_sort_stats_samtools": { "branch": "master", "git_sha": "0c38be7e652a0b2f3a37681ee4c0dbdf85677647", - "installed_by": ["fastq_align_bowtie2", "fastq_align_bwa", "fastq_align_chromap"] + "installed_by": [ + "fastq_align_bowtie2", + "fastq_align_bwa", + "fastq_align_chromap" + ] }, "bam_stats_samtools": { "branch": "master", "git_sha": "0c38be7e652a0b2f3a37681ee4c0dbdf85677647", - "installed_by": ["bam_markduplicates_picard", "bam_sort_stats_samtools"] + "installed_by": [ + "bam_markduplicates_picard", + "bam_sort_stats_samtools" + ] }, "fastq_align_bowtie2": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "fastq_align_bwa": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "fastq_align_chromap": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "fastq_fastqc_umitools_trimgalore": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/macs2/callpeak/main.nf b/modules/nf-core/macs2/callpeak/main.nf deleted file mode 100644 index cbef7838..00000000 --- a/modules/nf-core/macs2/callpeak/main.nf +++ /dev/null @@ -1,53 +0,0 @@ -process MACS2_CALLPEAK { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::macs2=2.2.7.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/macs2:2.2.7.1--py38h4a8c8d9_3' : - 'biocontainers/macs2:2.2.7.1--py38h4a8c8d9_3' }" - - input: - tuple val(meta), path(ipbam), path(controlbam) - val macs2_gsize - - output: - tuple val(meta), path("*.{narrowPeak,broadPeak}"), emit: peak - tuple val(meta), path("*.xls") , emit: xls - path "versions.yml" , emit: versions - - tuple val(meta), path("*.gappedPeak"), optional:true, emit: gapped - tuple val(meta), path("*.bed") , optional:true, emit: bed - tuple val(meta), path("*.bdg") , optional:true, emit: bdg - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def args_list = args.tokenize() - def format = meta.single_end ? 'BAM' : 'BAMPE' - def control = controlbam ? "--control $controlbam" : '' - if(args_list.contains('--format')){ - def id = args_list.findIndexOf{it=='--format'} - format = args_list[id+1] - args_list.remove(id+1) - args_list.remove(id) - } - """ - macs2 \\ - callpeak \\ - ${args_list.join(' ')} \\ - --gsize $macs2_gsize \\ - --format $format \\ - --name $prefix \\ - --treatment $ipbam \\ - $control - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - macs2: \$(macs2 --version | sed -e "s/macs2 //g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/macs2/callpeak/meta.yml b/modules/nf-core/macs2/callpeak/meta.yml deleted file mode 100644 index 6e2bc1db..00000000 --- a/modules/nf-core/macs2/callpeak/meta.yml +++ /dev/null @@ -1,63 +0,0 @@ -name: macs2_callpeak -description: Peak calling of enriched genomic regions of ChIP-seq and ATAC-seq experiments -keywords: - - alignment - - atac-seq - - chip-seq - - peak-calling -tools: - - macs2: - description: Model Based Analysis for ChIP-Seq data - - documentation: https://docs.csc.fi/apps/macs2/ - tool_dev_url: https://github.com/macs3-project/MACS - doi: "10.1101/496521" - licence: ["BSD"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - ipbam: - type: file - description: The ChIP-seq treatment file - - controlbam: - type: file - description: The control file - - macs2_gsize: - type: string - description: Effective genome size. It can be 1.0e+9 or 1000000000, or shortcuts:'hs' for human (2.7e9), - 'mm' for mouse (1.87e9), 'ce' for C. elegans (9e7) and 'dm' for fruitfly (1.2e8) - -output: - - versions: - type: file - description: File containing software version - pattern: "versions.yml" - - peak: - type: file - description: BED file containing annotated peaks - pattern: "*.gappedPeak,*.narrowPeak}" - - xls: - type: file - description: xls file containing annotated peaks - pattern: "*.xls" - - gapped: - type: file - description: Optional BED file containing gapped peak - pattern: "*.gappedPeak" - - bed: - type: file - description: Optional BED file containing peak summits locations for every peak - pattern: "*.bed" - - bdg: - type: file - description: Optional bedGraph files for input and treatment input samples - pattern: "*.bdg" - -authors: - - "@ntoda03" - - "@JoseEspinosa" - - "@jianhong" From 01623d6f403ee03d188dd79bddd04f217b1215a8 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 23 Jul 2024 11:13:34 +0200 Subject: [PATCH 3/7] Update all code using macs2 to use macs3, also docs --- CITATIONS.md | 2 +- README.md | 2 +- assets/multiqc/frip_score_header.txt | 4 +- assets/multiqc/peak_count_header.txt | 4 +- assets/multiqc_config.yml | 2 +- assets/schema_input.json | 19 +++++- ...erged_expand.py => macs3_merged_expand.py} | 10 +-- bin/{plot_macs2_qc.r => plot_macs3_qc.r} | 2 +- conf/modules.config | 34 +++++----- docs/output.md | 24 +++---- modules/local/igv.nf | 6 +- ...{macs2_consensus.nf => macs3_consensus.nf} | 4 +- modules/local/multiqc.nf | 8 +-- .../{plot_macs2_qc.nf => plot_macs3_qc.nf} | 4 +- nextflow_schema.json | 20 +++--- .../utils_nfcore_chipseq_pipeline/main.nf | 4 +- test.csv | 7 ++ tower.yml | 10 +-- workflows/chipseq.nf | 66 +++++++++---------- 19 files changed, 127 insertions(+), 105 deletions(-) rename bin/{macs2_merged_expand.py => macs3_merged_expand.py} (96%) rename bin/{plot_macs2_qc.r => plot_macs3_qc.r} (99%) rename modules/local/{macs2_consensus.nf => macs3_consensus.nf} (98%) rename modules/local/{plot_macs2_qc.nf => plot_macs3_qc.nf} (96%) create mode 100644 test.csv diff --git a/CITATIONS.md b/CITATIONS.md index f8160c8f..d75a5154 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -46,7 +46,7 @@ > Heinz S, Benner C, Spann N, Bertolino E, Lin YC, Laslo P, Cheng JX, Murre C, Singh H, Glass CK. Simple combinations of lineage-determining transcription factors prime cis-regulatory elements required for macrophage and B cell identities. Mol Cell. 2010 May 28;38(4):576-89. doi: 10.1016/j.molcel.2010.05.004. PubMed PMID: 20513432; PubMed Central PMCID: PMC2898526. -- [MACS2](https://www.ncbi.nlm.nih.gov/pubmed/18798982/) +- [MACS3](https://www.ncbi.nlm.nih.gov/pubmed/18798982/) > Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008;9(9):R137. doi: 10.1186/gb-2008-9-9-r137. Epub 2008 Sep 17. PubMed PMID: 18798982; PubMed Central PMCID: PMC2592715. diff --git a/README.md b/README.md index 2dcb70e1..01a8a10c 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ You can find numerous talks on the [nf-core events page](https://nf-co.re/events 5. Generate gene-body meta-profile from bigWig files ([`deepTools`](https://deeptools.readthedocs.io/en/develop/content/tools/plotProfile.html)) 6. Calculate genome-wide IP enrichment relative to control ([`deepTools`](https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html)) 7. Calculate strand cross-correlation peak and ChIP-seq quality measures including NSC and RSC ([`phantompeakqualtools`](https://github.com/kundajelab/phantompeakqualtools)) - 8. Call broad/narrow peaks ([`MACS2`](https://github.com/macs3-project/MACS)) + 8. Call broad/narrow peaks ([`MACS3`](https://github.com/macs3-project/MACS)) 9. Annotate peaks relative to gene features ([`HOMER`](http://homer.ucsd.edu/homer/download.html)) 10. Create consensus peakset across all samples and create tabular file to aid in the filtering of the data ([`BEDTools`](https://github.com/arq5x/bedtools2/)) 11. Count reads in consensus peaks ([`featureCounts`](http://bioinf.wehi.edu.au/featureCounts/)) diff --git a/assets/multiqc/frip_score_header.txt b/assets/multiqc/frip_score_header.txt index 82902115..b8c35cd7 100644 --- a/assets/multiqc/frip_score_header.txt +++ b/assets/multiqc/frip_score_header.txt @@ -1,7 +1,7 @@ #id: 'frip_score' -#section_name: 'MERGED LIB: MACS2 FRiP score' +#section_name: 'MERGED LIB: MACS3 FRiP score' #description: "is generated by calculating the fraction of all mapped reads that fall -# into the MACS2 called peak regions. A read must overlap a peak by at least 20% to be counted. +# into the MACS3 called peak regions. A read must overlap a peak by at least 20% to be counted. # See FRiP score." #plot_type: 'bargraph' #anchor: 'frip_score' diff --git a/assets/multiqc/peak_count_header.txt b/assets/multiqc/peak_count_header.txt index aa4dd346..66f6d2d9 100644 --- a/assets/multiqc/peak_count_header.txt +++ b/assets/multiqc/peak_count_header.txt @@ -1,7 +1,7 @@ #id: 'peak_count' -#section_name: 'MERGED LIB: MACS2 peak count' +#section_name: 'MERGED LIB: MACS3 peak count' #description: "is calculated from total number of peaks called by -# MACS2" +# MACS3" #plot_type: 'bargraph' #anchor: 'peak_count' #pconfig: diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 14a9f451..4f32837d 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -72,7 +72,7 @@ module_order: anchor: "mlib_featurecounts" info: "This section of the report shows featureCounts results for the number of reads assigned to merged library consensus peaks." path_filters: - - "./macs2/featurecounts/*.summary" + - "./macs3/featurecounts/*.summary" report_section_order: peak_count: diff --git a/assets/schema_input.json b/assets/schema_input.json index bc2fa467..fb41210f 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -33,15 +33,30 @@ } ] }, + "replicate": { + "type": "integer", + "errorMessage": "Replicate id not an integer!", + "meta": ["replicate"] + }, "antibody": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Antibody entry cannot contain spaces" + "errorMessage": "Antibody entry cannot contain spaces", + "dependentRequired": ["control"], + "meta": ["antibody"] }, "control": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Control entry cannot contain spaces" + "errorMessage": "Control entry cannot contain spaces", + "dependentRequired": ["antibody", "control_replicate"], + "meta": ["control"] + }, + "control_replicate": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Control entry cannot contain spaces", + "meta": ["control_replicate"] } }, "required": ["sample", "fastq_1"] diff --git a/bin/macs2_merged_expand.py b/bin/macs3_merged_expand.py similarity index 96% rename from bin/macs2_merged_expand.py rename to bin/macs3_merged_expand.py index 28ffb087..64dc7770 100755 --- a/bin/macs2_merged_expand.py +++ b/bin/macs3_merged_expand.py @@ -17,15 +17,15 @@ ############################################ Description = "Add sample boolean files and aggregate columns from merged MACS narrow or broad peak file." -Epilog = """Example usage: python macs2_merged_expand.py --is_narrow_peak --min_replicates 1""" +Epilog = """Example usage: python macs3_merged_expand.py --is_narrow_peak --min_replicates 1""" argParser = argparse.ArgumentParser(description=Description, epilog=Epilog) ## REQUIRED PARAMETERS -argParser.add_argument("MERGED_INTERVAL_FILE", help="Merged MACS2 interval file created using linux sort and mergeBed.") +argParser.add_argument("MERGED_INTERVAL_FILE", help="Merged MACS3 interval file created using linux sort and mergeBed.") argParser.add_argument( "SAMPLE_NAME_LIST", - help="Comma-separated list of sample names as named in individual MACS2 broadPeak/narrowPeak output file e.g. SAMPLE_R1 for SAMPLE_R1_peak_1.", + help="Comma-separated list of sample names as named in individual MACS3 broadPeak/narrowPeak output file e.g. SAMPLE_R1 for SAMPLE_R1_peak_1.", ) argParser.add_argument("OUTFILE", help="Full path to output directory.") @@ -76,7 +76,7 @@ def makedir(path): ## sort -k1,1 -k2,2n | mergeBed -c 2,3,4,5,6,7,8,9,10 -o collapse,collapse,collapse,collapse,collapse,collapse,collapse,collapse,collapse > merged_peaks.txt -def macs2_merged_expand(MergedIntervalTxtFile, SampleNameList, OutFile, isNarrow=False, minReplicates=1): +def macs3_merged_expand(MergedIntervalTxtFile, SampleNameList, OutFile, isNarrow=False, minReplicates=1): makedir(os.path.dirname(OutFile)) combFreqDict = {} @@ -208,7 +208,7 @@ def macs2_merged_expand(MergedIntervalTxtFile, SampleNameList, OutFile, isNarrow ############################################ ############################################ -macs2_merged_expand( +macs3_merged_expand( MergedIntervalTxtFile=args.MERGED_INTERVAL_FILE, SampleNameList=args.SAMPLE_NAME_LIST.split(","), OutFile=args.OUTFILE, diff --git a/bin/plot_macs2_qc.r b/bin/plot_macs3_qc.r similarity index 99% rename from bin/plot_macs2_qc.r rename to bin/plot_macs3_qc.r index 5cf074de..e40a6837 100755 --- a/bin/plot_macs2_qc.r +++ b/bin/plot_macs3_qc.r @@ -20,7 +20,7 @@ library(scales) option_list <- list(make_option(c("-i", "--peak_files"), type="character", default=NULL, help="Comma-separated list of peak files.", metavar="path"), make_option(c("-s", "--sample_ids"), type="character", default=NULL, help="Comma-separated list of sample ids associated with peak files. Must be unique and in same order as peaks files input.", metavar="string"), make_option(c("-o", "--outdir"), type="character", default='./', help="Output directory", metavar="path"), - make_option(c("-p", "--outprefix"), type="character", default='macs2_peakqc', help="Output prefix", metavar="string")) + make_option(c("-p", "--outprefix"), type="character", default='macs3_peakqc', help="Output prefix", metavar="string")) opt_parser <- OptionParser(option_list=option_list) opt <- parse_args(opt_parser) diff --git a/conf/modules.config b/conf/modules.config index 59af9e66..8828f48f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -540,7 +540,7 @@ if (!params.skip_plot_fingerprint) { } process { - withName: 'MACS2_CALLPEAK' { + withName: 'MACS3_CALLPEAK' { ext.args = [ '--keep-dup all', params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}", @@ -551,7 +551,7 @@ process { ].join(' ').trim() publishDir = [ path: { [ - "${params.outdir}/${params.aligner}/merged_library/macs2", + "${params.outdir}/${params.aligner}/merged_library/macs3", params.narrow_peak? '/narrow_peak' : '/broad_peak' ].join('') }, mode: params.publish_dir_mode, @@ -562,14 +562,14 @@ process { withName: 'FRIP_SCORE' { ext.args = '-bed -c -f 0.20' publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, enabled: false ] } withName: 'MULTIQC_CUSTOM_PEAKS' { publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -578,11 +578,11 @@ process { if (!params.skip_peak_annotation) { process { - withName: 'HOMER_ANNOTATEPEAKS_MACS2' { + withName: 'HOMER_ANNOTATEPEAKS_MACS3' { ext.args = '-gid' ext.prefix = { "${meta.id}_peaks" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -591,10 +591,10 @@ if (!params.skip_peak_annotation) { if (!params.skip_peak_qc) { process { - withName: 'PLOT_MACS2_QC' { - ext.args = '-o ./ -p macs2_peak' + withName: 'PLOT_MACS3_QC' { + ext.args = '-o ./ -p macs3_peak' publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -602,9 +602,9 @@ if (!params.skip_peak_annotation) { withName: 'PLOT_HOMER_ANNOTATEPEAKS' { ext.args = '-o ./' - ext.prefix = 'macs2_annotatePeaks' + ext.prefix = 'macs3_annotatePeaks' publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -615,11 +615,11 @@ if (!params.skip_peak_annotation) { if (!params.skip_consensus_peaks) { process { - withName: 'MACS2_CONSENSUS' { + withName: 'MACS3_CONSENSUS' { ext.when = { meta.multiple_groups || meta.replicates_exist } ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/consensus/${meta.id}" }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/consensus/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -629,7 +629,7 @@ if (!params.skip_consensus_peaks) { ext.args = '-F SAF -O --fracOverlap 0.2' ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/consensus/${meta.id}" }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/consensus/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -642,7 +642,7 @@ if (!params.skip_consensus_peaks) { ext.args = '-gid' ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/consensus/${meta.id}" }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/consensus/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -651,7 +651,7 @@ if (!params.skip_consensus_peaks) { withName: 'ANNOTATE_BOOLEAN_PEAKS' { ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/consensus/${meta.id}" }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/consensus/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -671,7 +671,7 @@ if (!params.skip_consensus_peaks) { ].join(' ').trim() ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/consensus/${meta.id}/deseq2" }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/consensus/${meta.id}/deseq2" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/docs/output.md b/docs/output.md index d8131c61..2f0ee320 100644 --- a/docs/output.md +++ b/docs/output.md @@ -188,21 +188,21 @@ The results from deepTools plotProfile gives you a quick visualisation for the g
Output files -- `/merged_library/macs2//` - - `*.xls`, `*.broadPeak` or `*.narrowPeak`, `*.gappedPeak`, `*summits.bed`: MACS2 output files - the files generated will depend on whether MACS2 has been run in _narrowPeak_ or _broadPeak_ mode. +- `/merged_library/macs3//` + - `*.xls`, `*.broadPeak` or `*.narrowPeak`, `*.gappedPeak`, `*summits.bed`: MACS3 output files - the files generated will depend on whether MACS3 has been run in _narrowPeak_ or _broadPeak_ mode. - `*.annotatePeaks.txt`: HOMER peak-to-gene annotation file. -- `/merged_library/macs2//qc/` - - `macs2_peak.plots.pdf`: QC plots for MACS2 peaks. - - `macs2_annotatePeaks.plots.pdf`: QC plots for peak-to-gene feature annotation. +- `/merged_library/macs3//qc/` + - `macs3_peak.plots.pdf`: QC plots for MACS3 peaks. + - `macs3_annotatePeaks.plots.pdf`: QC plots for peak-to-gene feature annotation. - `*.FRiP_mqc.tsv`, `*.peak_count_mqc.tsv`, `annotatepeaks.summary_mqc.tsv`: MultiQC custom-content files for FRiP score, peak count and peak-to-gene ratios. -> **NB:** `` in the directory structure above corresponds to the type of peak that you have specified to call with MACS2 i.e. `broadPeak` or `narrowPeak`. If you so wish, you can call both narrow and broad peaks without redoing the preceding steps in the pipeline such as the alignment and filtering. For example, if you already have broad peaks then just add `--narrow_peak -resume` to the command you used to run the pipeline, and these will be called too! However, resuming the pipeline will only be possible if you have not deleted the `work/` directory generated by the pipeline. +> **NB:** `` in the directory structure above corresponds to the type of peak that you have specified to call with MACS3 i.e. `broadPeak` or `narrowPeak`. If you so wish, you can call both narrow and broad peaks without redoing the preceding steps in the pipeline such as the alignment and filtering. For example, if you already have broad peaks then just add `--narrow_peak -resume` to the command you used to run the pipeline, and these will be called too! However, resuming the pipeline will only be possible if you have not deleted the `work/` directory generated by the pipeline.
-[MACS2](https://github.com/macs3-project/MACS) is one of the most popular peak-calling algorithms for ChIP-seq data. By default, the peaks are called with the MACS2 `--broad` parameter. If, however, you would like to call narrow peaks then please provide the `--narrow_peak` parameter when running the pipeline. See [MACS2 outputs](https://github.com/macs3-project/MACS/blob/master/docs/callpeak.md#output-files) for a description of the output files generated by MACS2. +[MACS3](https://github.com/macs3-project/MACS) is one of the most popular peak-calling algorithms for ChIP-seq data. By default, the peaks are called with the MACS3 `--broad` parameter. If, however, you would like to call narrow peaks then please provide the `--narrow_peak` parameter when running the pipeline. See [MACS3 outputs](https://github.com/macs3-project/MACS/blob/master/docs/callpeak.md#output-files) for a description of the output files generated by MACS3. -![MultiQC - MACS2 total peak count plot](images/mqc_macs2_peak_count_plot.png) +![MultiQC - MACS3 total peak count plot](images/mqc_macs2_peak_count_plot.png) [HOMER annotatePeaks.pl](http://homer.ucsd.edu/homer/ngs/annotation.html) is used to annotate the peaks relative to known genomic features. HOMER is able to use the `--gtf` annotation file which is provided to the pipeline. Please note that some of the output columns will be blank because the annotation is not provided using HOMER's in-built database format. However, the more important fields required for downstream analysis will be populated i.e. _Annotation_, _Distance to TSS_ and _Nearest Promoter ID_. @@ -210,14 +210,14 @@ The results from deepTools plotProfile gives you a quick visualisation for the g Various QC plots per sample including number of peaks, fold-change distribution, [FRiP score](https://genome.cshlp.org/content/22/9/1813.full.pdf+html) and peak-to-gene feature annotation are also generated by the pipeline. Where possible these have been integrated into the MultiQC report. -![MultiQC - MACS2 peaks FRiP score plot](images/mqc_frip_score_plot.png) +![MultiQC - MACS3 peaks FRiP score plot](images/mqc_frip_score_plot.png) ### Create and quantify consensus set of peaks
Output files -- `/merged_library/macs2//consensus//` +- `/merged_library/macs3//consensus//` - `*.bed`: Consensus peak-set across all samples in BED format. - `*.saf`: Consensus peak-set across all samples in SAF format. Required by featureCounts for read quantification. - `*.featureCounts.txt`: Read counts across all samples relative to consensus peak-set. @@ -245,7 +245,7 @@ The [featureCounts](http://bioinf.wehi.edu.au/featureCounts/) tool is used to co
Output files -- `/merged_library/macs2//consensus//deseq2/` +- `/merged_library/macs3//consensus//deseq2/` - `*.sample.dists.txt`: Spreadsheet containing sample-to-sample distance across each consensus peak. - `*.plots.pdf`: File containing PCA and hierarchical clustering plots. - `*.dds.RData`: File containing R `DESeqDataSet` object generated by DESeq2, with either @@ -254,7 +254,7 @@ The [featureCounts](http://bioinf.wehi.edu.au/featureCounts/) tool is used to co `readRDS` to give user control of the eventual object name. - `*pca.vals.txt`: Matrix of values for the first 2 principal components. - `R_sessionInfo.log`: File containing information about R, the OS and attached or loaded packages. - - `/merged_library/macs2//consensus//sizeFactors/` + - `/merged_library/macs3//consensus//sizeFactors/` - `*.txt`, `*.RData`: Files containing DESeq2 sizeFactors per sample.
diff --git a/modules/local/igv.nf b/modules/local/igv.nf index 4352edae..182a339e 100644 --- a/modules/local/igv.nf +++ b/modules/local/igv.nf @@ -13,8 +13,8 @@ process IGV { val peak_dir path fasta path ("${aligner_dir}/merged_library/bigwig/*") - path ("${aligner_dir}/merged_library/macs2/${peak_dir}/*") - path ("${aligner_dir}/merged_library/macs2/${peak_dir}/consensus/*") + path ("${aligner_dir}/merged_library/macs3/${peak_dir}/*") + path ("${aligner_dir}/merged_library/macs3/${peak_dir}/consensus/*") path ("mappings/*") output: @@ -27,7 +27,7 @@ process IGV { task.ext.when == null || task.ext.when script: // scripts are bundled with the pipeline in nf-core/chipseq/bin/ - def consensus_dir = "${aligner_dir}/merged_library/macs2/${peak_dir}/consensus/*" + def consensus_dir = "${aligner_dir}/merged_library/macs3/${peak_dir}/consensus/*" """ find * -type l -name "*.bigWig" -exec echo -e ""{}"\\t0,0,178" \\; > bigwig.igv.txt find * -type l -name "*Peak" -exec echo -e ""{}"\\t0,0,178" \\; > peaks.igv.txt diff --git a/modules/local/macs2_consensus.nf b/modules/local/macs3_consensus.nf similarity index 98% rename from modules/local/macs2_consensus.nf rename to modules/local/macs3_consensus.nf index 6b2006e4..520727a8 100644 --- a/modules/local/macs2_consensus.nf +++ b/modules/local/macs3_consensus.nf @@ -1,7 +1,7 @@ /* * Consensus peaks across samples, create boolean filtering file, SAF file for featureCounts */ -process MACS2_CONSENSUS { +process MACS3_CONSENSUS { tag "$meta.id" label 'process_long' @@ -37,7 +37,7 @@ process MACS2_CONSENSUS { sort -T '.' -k1,1 -k2,2n ${peaks.collect{it.toString()}.sort().join(' ')} \\ | mergeBed -c $mergecols -o $collapsecols > ${prefix}.txt - macs2_merged_expand.py \\ + macs3_merged_expand.py \\ ${prefix}.txt \\ ${peaks.collect{it.toString()}.sort().join(',').replaceAll("_peaks.${peak_type}","")} \\ ${prefix}.boolean.txt \\ diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf index 31da0b07..db64f9d7 100644 --- a/modules/local/multiqc.nf +++ b/modules/local/multiqc.nf @@ -40,10 +40,10 @@ process MULTIQC { path ('phantompeakqualtools/*') path ('phantompeakqualtools/*') - path ('macs2/peaks/*') - path ('macs2/peaks/*') - path ('macs2/annotation/*') - path ('macs2/featurecounts/*') + path ('macs3/peaks/*') + path ('macs3/peaks/*') + path ('macs3/annotation/*') + path ('macs3/featurecounts/*') path ('deseq2/*') path ('deseq2/*') diff --git a/modules/local/plot_macs2_qc.nf b/modules/local/plot_macs3_qc.nf similarity index 96% rename from modules/local/plot_macs2_qc.nf rename to modules/local/plot_macs3_qc.nf index d514db46..1722cadd 100644 --- a/modules/local/plot_macs2_qc.nf +++ b/modules/local/plot_macs3_qc.nf @@ -1,4 +1,4 @@ -process PLOT_MACS2_QC { +process PLOT_MACS3_QC { label 'process_medium' conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" @@ -22,7 +22,7 @@ process PLOT_MACS2_QC { def args = task.ext.args ?: '' def peak_type = is_narrow_peak ? 'narrowPeak' : 'broadPeak' """ - plot_macs2_qc.r \\ + plot_macs3_qc.r \\ -i ${peaks.join(',')} \\ -s ${peaks.join(',').replaceAll("_peaks.${peak_type}","")} \\ $args diff --git a/nextflow_schema.json b/nextflow_schema.json index f51e0940..7189751b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -36,9 +36,9 @@ }, "read_length": { "type": "integer", - "description": "Read length used to calculate MACS2 genome size for peak calling if `--macs_gsize` isn't provided.", + "description": "Read length used to calculate MACS3 genome size for peak calling if `--macs_gsize` isn't provided.", "fa_icon": "fas fa-chart-area", - "help_text": "Read length together with the genome fasta are used to calculate MACS2 genome size using the `khmer` program as explained [here](https://deeptools.readthedocs.io/en/develop/content/feature/effectiveGenomeSize.html#effective-genome-size). For all the genomes present in the `igenomes.config` the genome size has been already precomputed and the read length is then used to retrieve the corresponding value", + "help_text": "Read length together with the genome fasta are used to calculate MACS3 genome size using the `khmer` program as explained [here](https://deeptools.readthedocs.io/en/develop/content/feature/effectiveGenomeSize.html#effective-genome-size). For all the genomes present in the `igenomes.config` the genome size has been already precomputed and the read length is then used to retrieve the corresponding value", "enum": [50, 75, 100, 150, 200] }, "outdir": { @@ -142,8 +142,8 @@ }, "macs_gsize": { "type": "number", - "description": "Effective genome size parameter required by MACS2.", - "help_text": "[Effective genome size](https://github.com/taoliu/MACS#-g--gsize) parameter required by MACS2. If using an iGenomes reference these have been provided for any of the genomes available in the igenomes.config, and for the following read lengths (50,75,100,150,200) that should be set using the `--read_length` parameter. For other genomes, if this parameter is not specified it will be inferred using the provided `--read_length` or otherwise the pipeline execution will stop with an error.", + "description": "Effective genome size parameter required by MACS3.", + "help_text": "[Effective genome size](https://github.com/taoliu/MACS#-g--gsize) parameter required by MACS3. If using an iGenomes reference these have been provided for any of the genomes available in the igenomes.config, and for the following read lengths (50,75,100,150,200) that should be set using the `--read_length` parameter. For other genomes, if this parameter is not specified it will be inferred using the provided `--read_length` or otherwise the pipeline execution will stop with an error.", "fa_icon": "fas fa-arrows-alt-h" }, "blacklist": { @@ -292,14 +292,14 @@ "properties": { "narrow_peak": { "type": "boolean", - "description": "Run MACS2 in narrowPeak mode.", - "help_text": "MACS2 is run by default with the [`--broad`](https://github.com/taoliu/MACS#--broad) flag. Specify this flag to call peaks in narrowPeak mode.", + "description": "Run MACS3 in narrowPeak mode.", + "help_text": "MACS3 is run by default with the [`--broad`](https://github.com/taoliu/MACS#--broad) flag. Specify this flag to call peaks in narrowPeak mode.", "fa_icon": "fas fa-arrows-alt-h" }, "broad_cutoff": { "type": "number", "default": 0.1, - "description": "Specifies broad cutoff value for MACS2. Only used when --narrow_peak isnt specified.", + "description": "Specifies broad cutoff value for MACS3. Only used when --narrow_peak isnt specified.", "fa_icon": "fas fa-hand-scissors" }, "macs_fdr": { @@ -321,18 +321,18 @@ }, "save_macs_pileup": { "type": "boolean", - "description": "Instruct MACS2 to create bedGraph files normalised to signal per million reads.", + "description": "Instruct MACS3 to create bedGraph files normalised to signal per million reads.", "fa_icon": "fas fa-save" }, "skip_peak_qc": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Skip MACS2 peak QC plot generation." + "description": "Skip MACS3 peak QC plot generation." }, "skip_peak_annotation": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Skip annotation of MACS2 and consensus peaks with HOMER." + "description": "Skip annotation of MACS3 and consensus peaks with HOMER." }, "skip_consensus_peaks": { "type": "boolean", diff --git a/subworkflows/local/utils_nfcore_chipseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_chipseq_pipeline/main.nf index 14bac09f..37070807 100644 --- a/subworkflows/local/utils_nfcore_chipseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_chipseq_pipeline/main.nf @@ -145,7 +145,7 @@ def validateInputParameters() { } if (!params.read_length && !params.macs_gsize) { - error ("Both '--read_length' and '--macs_gsize' not specified! Please specify either to infer MACS2 genome size for peak calling.") + error ("Both '--read_length' and '--macs_gsize' not specified! Please specify either to infer MACS3 genome size for peak calling.") } } @@ -269,6 +269,6 @@ def macsGsizeWarn(log) { log.warn "=============================================================================\n" + " --macs_gsize parameter has not been provided.\n" + " It will be auto-calculated by 'khmer unique-kmers.py' using the '--read_length' parameter.\n" + - " Explicitly provide '--macs_gsize macs2_genome_size' to change this behaviour.\n" + + " Explicitly provide '--macs_gsize macs3_genome_size' to change this behaviour.\n" + "===================================================================================" } diff --git a/test.csv b/test.csv new file mode 100644 index 00000000..c871931f --- /dev/null +++ b/test.csv @@ -0,0 +1,7 @@ +sample,fastq_1,fastq_2,replicate,antibody,control,control_replicate +SPT5_T0,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822153_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822153_2.fastq.gz,1,SPT5,SPT5_INPUT,1 +SPT5_T0,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822154_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822154_2.fastq.gz,2,SPT5,SPT5_INPUT,2 +SPT5_T15,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822157_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822157_2.fastq.gz,1,SPT5,SPT5_INPUT,1 +SPT5_T15,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822158_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822158_2.fastq.gz,2,SPT5,SPT5_INPUT,2 +SPT5_INPUT,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,1,,, +SPT5_INPUT,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,2,,, diff --git a/tower.yml b/tower.yml index 5b1f5f90..5b981d67 100644 --- a/tower.yml +++ b/tower.yml @@ -1,9 +1,9 @@ reports: multiqc_report.html: display: "MultiQC HTML report" - macs2_peak.plots.pdf: - display: "All samples MACS2 peak QC PDF plots" - macs2_annotatePeaks.plots.pdf: + macs3_peak.plots.pdf: + display: "All samples MACS3 peak QC PDF plots" + macs3_annotatePeaks.plots.pdf: display: "All samples HOMER annotatePeaks.pl QC PDF plots" "*.consensus_peaks.plots.pdf": display: "Consensus peaks DESeq2 QC PDF plots" @@ -14,6 +14,6 @@ reports: "*.plotHeatmap.pdf": display: "Per-sample deepTools plotHeatmap PDF plots" "*_peaks.broadPeak": - display: "Per-sample MACS2 broadPeak file" + display: "Per-sample MACS3 broadPeak file" "*_peaks.narrowPeak": - display: "Per-sample MACS2 narrowPeak file" + display: "Per-sample MACS3 narrowPeak file" diff --git a/workflows/chipseq.nf b/workflows/chipseq.nf index 8f4a9e63..a5a255c8 100644 --- a/workflows/chipseq.nf +++ b/workflows/chipseq.nf @@ -9,9 +9,9 @@ // include { BEDTOOLS_GENOMECOV } from '../modules/local/bedtools_genomecov' include { FRIP_SCORE } from '../modules/local/frip_score' -include { PLOT_MACS2_QC } from '../modules/local/plot_macs2_qc' +include { PLOT_MACS3_QC } from '../modules/local/plot_macs3_qc' include { PLOT_HOMER_ANNOTATEPEAKS } from '../modules/local/plot_homer_annotatepeaks' -include { MACS2_CONSENSUS } from '../modules/local/macs2_consensus' +include { MACS3_CONSENSUS } from '../modules/local/macs3_consensus' include { ANNOTATE_BOOLEAN_PEAKS } from '../modules/local/annotate_boolean_peaks' include { DESEQ2_QC } from '../modules/local/deseq2_qc' include { IGV } from '../modules/local/igv' @@ -50,10 +50,10 @@ include { DEEPTOOLS_PLOTPROFILE } from '../modules/nf-core/deeptools/plo include { DEEPTOOLS_PLOTHEATMAP } from '../modules/nf-core/deeptools/plotheatmap/main' include { DEEPTOOLS_PLOTFINGERPRINT } from '../modules/nf-core/deeptools/plotfingerprint/main' include { KHMER_UNIQUEKMERS } from '../modules/nf-core/khmer/uniquekmers/main' -include { MACS2_CALLPEAK } from '../modules/nf-core/macs2/callpeak/main' +include { MACS3_CALLPEAK } from '../modules/nf-core/macs3/callpeak/main' include { SUBREAD_FEATURECOUNTS } from '../modules/nf-core/subread/featurecounts/main' -include { HOMER_ANNOTATEPEAKS as HOMER_ANNOTATEPEAKS_MACS2 } from '../modules/nf-core/homer/annotatepeaks/main' +include { HOMER_ANNOTATEPEAKS as HOMER_ANNOTATEPEAKS_MACS3 } from '../modules/nf-core/homer/annotatepeaks/main' include { HOMER_ANNOTATEPEAKS as HOMER_ANNOTATEPEAKS_CONSENSUS } from '../modules/nf-core/homer/annotatepeaks/main' // @@ -467,29 +467,29 @@ workflow CHIPSEQ { .set { ch_ip_control_bam } // - // MODULE: Call peaks with MACS2 + // MODULE: Call peaks with MACS3 // - MACS2_CALLPEAK ( + MACS3_CALLPEAK ( ch_ip_control_bam, ch_macs_gsize ) - ch_versions = ch_versions.mix(MACS2_CALLPEAK.out.versions.first()) + ch_versions = ch_versions.mix(MACS3_CALLPEAK.out.versions.first()) // - // Filter out samples with 0 MACS2 peaks called + // Filter out samples with 0 MACS3 peaks called // - MACS2_CALLPEAK + MACS3_CALLPEAK .out .peak .filter { meta, peaks -> peaks.size() > 0 } - .set { ch_macs2_peaks } + .set { ch_macs3_peaks } // Create channels: [ meta, ip_bam, peaks ] ch_ip_control_bam - .join(ch_macs2_peaks, by: [0]) + .join(ch_macs3_peaks, by: [0]) .map { it -> [ it[0], it[1], it[3] ] @@ -526,30 +526,30 @@ workflow CHIPSEQ { if (!params.skip_peak_annotation) { // - // MODULE: Annotate peaks with MACS2 + // MODULE: Annotate peaks with MACS3 // - HOMER_ANNOTATEPEAKS_MACS2 ( - ch_macs2_peaks, + HOMER_ANNOTATEPEAKS_MACS3 ( + ch_macs3_peaks, ch_fasta, ch_gtf ) - ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS_MACS2.out.versions.first()) + ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS_MACS3.out.versions.first()) if (!params.skip_peak_qc) { // - // MODULE: MACS2 QC plots with R + // MODULE: MACS3 QC plots with R // - PLOT_MACS2_QC ( - ch_macs2_peaks.collect{it[1]}, + PLOT_MACS3_QC ( + ch_macs3_peaks.collect{it[1]}, params.narrow_peak ) - ch_versions = ch_versions.mix(PLOT_MACS2_QC.out.versions) + ch_versions = ch_versions.mix(PLOT_MACS3_QC.out.versions) // // MODULE: Peak annotation QC plots with R // PLOT_HOMER_ANNOTATEPEAKS ( - HOMER_ANNOTATEPEAKS_MACS2.out.txt.collect{it[1]}, + HOMER_ANNOTATEPEAKS_MACS3.out.txt.collect{it[1]}, ch_peak_annotation_header, "_peaks.annotatePeaks.txt" ) @@ -561,14 +561,14 @@ workflow CHIPSEQ { // // Consensus peaks analysis // - ch_macs2_consensus_bed_lib = Channel.empty() - ch_macs2_consensus_txt_lib = Channel.empty() + ch_macs3_consensus_bed_lib = Channel.empty() + ch_macs3_consensus_txt_lib = Channel.empty() ch_deseq2_pca_multiqc = Channel.empty() ch_deseq2_clustering_multiqc = Channel.empty() if (!params.skip_consensus_peaks) { // Create channels: [ meta , [ peaks ] ] // Where meta = [ id:antibody, multiple_groups:true/false, replicates_exist:true/false ] - ch_macs2_peaks + ch_macs3_peaks .map { meta, peak -> [ meta.antibody, meta.id.split('_')[0..-2].join('_'), peak ] @@ -595,20 +595,20 @@ workflow CHIPSEQ { // // MODULE: Generate consensus peaks across samples // - MACS2_CONSENSUS ( + MACS3_CONSENSUS ( ch_antibody_peaks, params.narrow_peak ) - ch_macs2_consensus_bed_lib = MACS2_CONSENSUS.out.bed - ch_macs2_consensus_txt_lib = MACS2_CONSENSUS.out.txt - ch_versions = ch_versions.mix(MACS2_CONSENSUS.out.versions) + ch_macs3_consensus_bed_lib = MACS3_CONSENSUS.out.bed + ch_macs3_consensus_txt_lib = MACS3_CONSENSUS.out.txt + ch_versions = ch_versions.mix(MACS3_CONSENSUS.out.versions) if (!params.skip_peak_annotation) { // // MODULE: Annotate consensus peaks // HOMER_ANNOTATEPEAKS_CONSENSUS ( - MACS2_CONSENSUS.out.bed, + MACS3_CONSENSUS.out.bed, ch_fasta, ch_gtf ) @@ -618,7 +618,7 @@ workflow CHIPSEQ { // MODULE: Add boolean fields to annotated consensus peaks to aid filtering // ANNOTATE_BOOLEAN_PEAKS ( - MACS2_CONSENSUS.out.boolean_txt.join(HOMER_ANNOTATEPEAKS_CONSENSUS.out.txt, by: [0]), + MACS3_CONSENSUS.out.boolean_txt.join(HOMER_ANNOTATEPEAKS_CONSENSUS.out.txt, by: [0]), ) ch_versions = ch_versions.mix(ANNOTATE_BOOLEAN_PEAKS.out.versions) } @@ -633,7 +633,7 @@ workflow CHIPSEQ { .set { ch_antibody_bams } // Create channels: [ meta, [ ip_bams ], saf ] - MACS2_CONSENSUS + MACS3_CONSENSUS .out .saf .map { @@ -679,9 +679,9 @@ workflow CHIPSEQ { params.narrow_peak ? 'narrow_peak' : 'broad_peak', ch_fasta, UCSC_BEDGRAPHTOBIGWIG.out.bigwig.collect{it[1]}.ifEmpty([]), - ch_macs2_peaks.collect{it[1]}.ifEmpty([]), - ch_macs2_consensus_bed_lib.collect{it[1]}.ifEmpty([]), - ch_macs2_consensus_txt_lib.collect{it[1]}.ifEmpty([]) + ch_macs3_peaks.collect{it[1]}.ifEmpty([]), + ch_macs3_consensus_bed_lib.collect{it[1]}.ifEmpty([]), + ch_macs3_consensus_txt_lib.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix(IGV.out.versions) } From f8514362914b0c488a5c0574280785549e8d3cc0 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 23 Jul 2024 11:13:43 +0200 Subject: [PATCH 4/7] Update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index be4590e9..adf84b3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#385](https://github.com/nf-core/chipseq/issues/385)] - Fix `--save_unaligned` description in schema. - [[PR #392](https://github.com/nf-core/chipseq/pull/392)] - Adding line numbers to warnings/errors messages in `bin/check_samplesheet.py`. - [[#396](https://github.com/nf-core/chipseq/issues/396)] - Check that samplesheet samples IDs do only have alphanumeric characters, dots, dashes or underscores. +- [[#378](https://github.com/nf-core/chipseq/issues/378)] - Switch from macs2 to macs3. ### Software dependencies @@ -35,6 +36,8 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi | Dependency | Old version | New version | | ---------- | ----------- | ----------- | | `chromap` | 0.2.1 | 0.2.4 | +| `macs2` | 2.2.7.1 | | +| `macs3` | | 3.0.1 | | `multiqc` | 1.13 | 1.14 | | `picard` | 2.27.4 | 3.0.0 | | `samtools` | 1.15.1 | 1.17 | From 0191a6fa4d1c3089dd4d79bb7c97379e696db68f Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 23 Jul 2024 11:26:31 +0200 Subject: [PATCH 5/7] Update macs3/callpeak module --- modules.json | 195 ++++++------------------ modules/nf-core/macs3/callpeak/meta.yml | 2 +- 2 files changed, 47 insertions(+), 150 deletions(-) diff --git a/modules.json b/modules.json index a1bfa4c7..e8c2f7aa 100644 --- a/modules.json +++ b/modules.json @@ -8,252 +8,172 @@ "bowtie2/align": { "branch": "master", "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde", - "installed_by": [ - "fastq_align_bowtie2", - "modules" - ] + "installed_by": ["fastq_align_bowtie2", "modules"] }, "bowtie2/build": { "branch": "master", "git_sha": "6a24fbe314bb2e6fe6306c29a63076ea87e8eb3c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwa/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwa/mem": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "fastq_align_bwa", - "modules" - ] + "installed_by": ["fastq_align_bwa", "modules"] }, "chromap/chromap": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "fastq_align_chromap", - "modules" - ] + "installed_by": ["fastq_align_chromap", "modules"] }, "chromap/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "custom/getchromsizes": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "deeptools/computematrix": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "deeptools/plotfingerprint": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "deeptools/plotheatmap": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "deeptools/plotprofile": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastqc": { "branch": "master", "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", - "installed_by": [ - "fastq_fastqc_umitools_trimgalore", - "modules" - ] + "installed_by": ["fastq_fastqc_umitools_trimgalore", "modules"] }, "gffread": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gunzip": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "homer/annotatepeaks": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "khmer/uniquekmers": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "macs3/callpeak": { "branch": "master", - "git_sha": "6046db922ee6c425a4dff26eeda122dffc3df9c0", - "installed_by": [ - "modules" - ] + "git_sha": "3a3f471ed42f640115b2661edee5b258e6a254c1", + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "phantompeakqualtools": { "branch": "master", "git_sha": "2dfe9afa90fefc70e320140e5f41287f01f324b0", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "picard/collectmultiplemetrics": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "picard/markduplicates": { "branch": "master", "git_sha": "ec833ac4c29db6005d18baccf3306f557c46b006", - "installed_by": [ - "bam_markduplicates_picard", - "modules" - ] + "installed_by": ["bam_markduplicates_picard", "modules"] }, "picard/mergesamfiles": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "preseq/lcextrap": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", - "installed_by": [ - "bam_stats_samtools", - "modules" - ] + "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/idxstats": { "branch": "master", "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", - "installed_by": [ - "bam_stats_samtools", - "modules" - ] + "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/index": { "branch": "master", "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", - "installed_by": [ - "bam_markduplicates_picard", - "bam_sort_stats_samtools", - "modules" - ] + "installed_by": ["bam_markduplicates_picard", "bam_sort_stats_samtools", "modules"] }, "samtools/sort": { "branch": "master", "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", - "installed_by": [ - "bam_sort_stats_samtools", - "modules" - ] + "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/stats": { "branch": "master", "git_sha": "ec833ac4c29db6005d18baccf3306f557c46b006", - "installed_by": [ - "bam_stats_samtools", - "modules" - ] + "installed_by": ["bam_stats_samtools", "modules"] }, "subread/featurecounts": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "trimgalore": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "fastq_fastqc_umitools_trimgalore", - "modules" - ] + "installed_by": ["fastq_fastqc_umitools_trimgalore", "modules"] }, "ucsc/bedgraphtobigwig": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "umitools/extract": { "branch": "master", "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", - "installed_by": [ - "fastq_fastqc_umitools_trimgalore" - ] + "installed_by": ["fastq_fastqc_umitools_trimgalore"] }, "untar": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "untarfiles": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -262,78 +182,55 @@ "bam_markduplicates_picard": { "branch": "master", "git_sha": "0c38be7e652a0b2f3a37681ee4c0dbdf85677647", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "bam_sort_stats_samtools": { "branch": "master", "git_sha": "0c38be7e652a0b2f3a37681ee4c0dbdf85677647", - "installed_by": [ - "fastq_align_bowtie2", - "fastq_align_bwa", - "fastq_align_chromap" - ] + "installed_by": ["fastq_align_bowtie2", "fastq_align_bwa", "fastq_align_chromap"] }, "bam_stats_samtools": { "branch": "master", "git_sha": "0c38be7e652a0b2f3a37681ee4c0dbdf85677647", - "installed_by": [ - "bam_markduplicates_picard", - "bam_sort_stats_samtools" - ] + "installed_by": ["bam_markduplicates_picard", "bam_sort_stats_samtools"] }, "fastq_align_bowtie2": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "fastq_align_bwa": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "fastq_align_chromap": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "fastq_fastqc_umitools_trimgalore": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} diff --git a/modules/nf-core/macs3/callpeak/meta.yml b/modules/nf-core/macs3/callpeak/meta.yml index b1c7cade..1603b8e2 100644 --- a/modules/nf-core/macs3/callpeak/meta.yml +++ b/modules/nf-core/macs3/callpeak/meta.yml @@ -28,7 +28,7 @@ input: - controlbam: type: file description: The control file - - macs2_gsize: + - macs3_gsize: type: string description: | Effective genome size. It can be 1.0e+9 or 1000000000, From b0d81e68c41df260005d880360d07206c7d4972d Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 23 Jul 2024 12:02:22 +0200 Subject: [PATCH 6/7] Update subworkflows using macs2 to macs3 after merge --- ...bam_peaks_call_qc_annotate_macs3_homer.nf} | 54 +++++++++---------- ...antify_qc_bedtools_featurecounts_deseq2.nf | 24 ++++----- workflows/chipseq.nf | 26 ++++----- 3 files changed, 52 insertions(+), 52 deletions(-) rename subworkflows/local/{bam_peaks_call_qc_annotate_macs2_homer.nf => bam_peaks_call_qc_annotate_macs3_homer.nf} (76%) diff --git a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf b/subworkflows/local/bam_peaks_call_qc_annotate_macs3_homer.nf similarity index 76% rename from subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf rename to subworkflows/local/bam_peaks_call_qc_annotate_macs3_homer.nf index 5b7b9b85..69ecb99f 100644 --- a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf +++ b/subworkflows/local/bam_peaks_call_qc_annotate_macs3_homer.nf @@ -1,16 +1,16 @@ // -// Call peaks with MACS2, annotate with HOMER and perform downstream QC +// Call peaks with MACS3, annotate with HOMER and perform downstream QC // -include { MACS2_CALLPEAK } from '../../modules/nf-core/macs2/callpeak/main' +include { MACS3_CALLPEAK } from '../../modules/nf-core/macs3/callpeak/main' include { HOMER_ANNOTATEPEAKS } from '../../modules/nf-core/homer/annotatepeaks/main' include { FRIP_SCORE } from '../../modules/local/frip_score' include { MULTIQC_CUSTOM_PEAKS } from '../../modules/local/multiqc_custom_peaks' -include { PLOT_MACS2_QC } from '../../modules/local/plot_macs2_qc' +include { PLOT_MACS3_QC } from '../../modules/local/plot_macs3_qc' include { PLOT_HOMER_ANNOTATEPEAKS } from '../../modules/local/plot_homer_annotatepeaks' -workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { +workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER { take: ch_bam // channel: [ val(meta), [ ip_bam ], [ control_bam ] ] ch_fasta // channel: [ fasta ] @@ -29,29 +29,29 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { ch_versions = Channel.empty() // - // Call peaks with MACS2 + // Call peaks with MACS3 // - MACS2_CALLPEAK ( + MACS3_CALLPEAK ( ch_bam, macs_gsize ) - ch_versions = ch_versions.mix(MACS2_CALLPEAK.out.versions.first()) + ch_versions = ch_versions.mix(MACS3_CALLPEAK.out.versions.first()) // - // Filter out samples with 0 MACS2 peaks called + // Filter out samples with 0 MACS3 peaks called // - MACS2_CALLPEAK + MACS3_CALLPEAK .out .peak .filter { meta, peaks -> peaks.size() > 0 } - .set { ch_macs2_peaks } + .set { ch_macs3_peaks } // Create channels: [ meta, ip_bam, peaks ] ch_bam - .join(ch_macs2_peaks, by: [0]) + .join(ch_macs3_peaks, by: [0]) .map { meta, ip_bam, control_bam, peaks -> [ meta, ip_bam, peaks ] @@ -86,8 +86,8 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { ch_versions = ch_versions.mix(MULTIQC_CUSTOM_PEAKS.out.versions.first()) ch_homer_annotatepeaks = Channel.empty() - ch_plot_macs2_qc_txt = Channel.empty() - ch_plot_macs2_qc_pdf = Channel.empty() + ch_plot_macs3_qc_txt = Channel.empty() + ch_plot_macs3_qc_pdf = Channel.empty() ch_plot_homer_annotatepeaks_txt = Channel.empty() ch_plot_homer_annotatepeaks_pdf = Channel.empty() ch_plot_homer_annotatepeaks_tsv = Channel.empty() @@ -96,7 +96,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { // Annotate peaks with HOMER // HOMER_ANNOTATEPEAKS ( - ch_macs2_peaks, + ch_macs3_peaks, ch_fasta, ch_gtf ) @@ -105,15 +105,15 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { if (!skip_peak_qc) { // - // MACS2 QC plots with R + // MACS3 QC plots with R // - PLOT_MACS2_QC ( - ch_macs2_peaks.collect{it[1]}, + PLOT_MACS3_QC ( + ch_macs3_peaks.collect{it[1]}, is_narrow_peak ) - ch_plot_macs2_qc_txt = PLOT_MACS2_QC.out.txt - ch_plot_macs2_qc_pdf = PLOT_MACS2_QC.out.pdf - ch_versions = ch_versions.mix(PLOT_MACS2_QC.out.versions) + ch_plot_macs3_qc_txt = PLOT_MACS3_QC.out.txt + ch_plot_macs3_qc_pdf = PLOT_MACS3_QC.out.pdf + ch_versions = ch_versions.mix(PLOT_MACS3_QC.out.versions) // // Peak annotation QC plots with R @@ -131,11 +131,11 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { } emit: - peaks = ch_macs2_peaks // channel: [ val(meta), [ peaks ] ] - xls = MACS2_CALLPEAK.out.xls // channel: [ val(meta), [ xls ] ] - gapped_peaks = MACS2_CALLPEAK.out.gapped // channel: [ val(meta), [ gapped_peak ] ] - bed = MACS2_CALLPEAK.out.bed // channel: [ val(meta), [ bed ] ] - bedgraph = MACS2_CALLPEAK.out.bdg // channel: [ val(meta), [ bedgraph ] ] + peaks = ch_macs3_peaks // channel: [ val(meta), [ peaks ] ] + xls = MACS3_CALLPEAK.out.xls // channel: [ val(meta), [ xls ] ] + gapped_peaks = MACS3_CALLPEAK.out.gapped // channel: [ val(meta), [ gapped_peak ] ] + bed = MACS3_CALLPEAK.out.bed // channel: [ val(meta), [ bed ] ] + bedgraph = MACS3_CALLPEAK.out.bdg // channel: [ val(meta), [ bedgraph ] ] frip_txt = FRIP_SCORE.out.txt // channel: [ val(meta), [ txt ] ] @@ -144,8 +144,8 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { homer_annotatepeaks = ch_homer_annotatepeaks // channel: [ val(meta), [ txt ] ] - plot_macs2_qc_txt = ch_plot_macs2_qc_txt // channel: [ txt ] - plot_macs2_qc_pdf = ch_plot_macs2_qc_pdf // channel: [ pdf ] + plot_macs3_qc_txt = ch_plot_macs3_qc_txt // channel: [ txt ] + plot_macs3_qc_pdf = ch_plot_macs3_qc_pdf // channel: [ pdf ] plot_homer_annotatepeaks_txt = ch_plot_homer_annotatepeaks_txt // channel: [ txt ] plot_homer_annotatepeaks_pdf = ch_plot_homer_annotatepeaks_pdf // channel: [ pdf ] diff --git a/subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf b/subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf index 6b9b8efe..294a0eec 100644 --- a/subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf +++ b/subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf @@ -5,7 +5,7 @@ include { HOMER_ANNOTATEPEAKS } from '../../modules/nf-core/homer/annotatepeaks/main' include { SUBREAD_FEATURECOUNTS } from '../../modules/nf-core/subread/featurecounts/main' -include { MACS2_CONSENSUS } from '../../modules/local/macs2_consensus' +include { MACS3_CONSENSUS } from '../../modules/local/macs3_consensus' include { ANNOTATE_BOOLEAN_PEAKS } from '../../modules/local/annotate_boolean_peaks' include { DESEQ2_QC } from '../../modules/local/deseq2_qc' @@ -54,18 +54,18 @@ workflow BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 { // // Generate consensus peaks across samples // - MACS2_CONSENSUS ( + MACS3_CONSENSUS ( ch_antibody_peaks, is_narrow_peak ) - ch_versions = ch_versions.mix(MACS2_CONSENSUS.out.versions) + ch_versions = ch_versions.mix(MACS3_CONSENSUS.out.versions) // // Annotate consensus peaks // if (!skip_peak_annotation) { HOMER_ANNOTATEPEAKS ( - MACS2_CONSENSUS.out.bed, + MACS3_CONSENSUS.out.bed, ch_fasta, ch_gtf ) @@ -75,13 +75,13 @@ workflow BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 { // MODULE: Add boolean fields to annotated consensus peaks to aid filtering // ANNOTATE_BOOLEAN_PEAKS ( - MACS2_CONSENSUS.out.boolean_txt.join(HOMER_ANNOTATEPEAKS.out.txt, by: [0]), + MACS3_CONSENSUS.out.boolean_txt.join(HOMER_ANNOTATEPEAKS.out.txt, by: [0]), ) ch_versions = ch_versions.mix(ANNOTATE_BOOLEAN_PEAKS.out.versions) } // Create channels: [ meta, [ ip_bams ], saf ] - MACS2_CONSENSUS + MACS3_CONSENSUS .out .saf .map { @@ -134,12 +134,12 @@ workflow BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 { } emit: - consensus_bed = MACS2_CONSENSUS.out.bed // channel: [ bed ] - consensus_saf = MACS2_CONSENSUS.out.saf // channel: [ saf ] - consensus_pdf = MACS2_CONSENSUS.out.pdf // channel: [ pdf ] - consensus_txt = MACS2_CONSENSUS.out.txt // channel: [ pdf ] - consensus_boolean_txt = MACS2_CONSENSUS.out.boolean_txt // channel: [ txt ] - consensus_intersect_txt = MACS2_CONSENSUS.out.intersect_txt // channel: [ txt ] + consensus_bed = MACS3_CONSENSUS.out.bed // channel: [ bed ] + consensus_saf = MACS3_CONSENSUS.out.saf // channel: [ saf ] + consensus_pdf = MACS3_CONSENSUS.out.pdf // channel: [ pdf ] + consensus_txt = MACS3_CONSENSUS.out.txt // channel: [ pdf ] + consensus_boolean_txt = MACS3_CONSENSUS.out.boolean_txt // channel: [ txt ] + consensus_intersect_txt = MACS3_CONSENSUS.out.intersect_txt // channel: [ txt ] featurecounts_txt = SUBREAD_FEATURECOUNTS.out.counts // channel: [ txt ] featurecounts_summary = SUBREAD_FEATURECOUNTS.out.summary // channel: [ txt ] diff --git a/workflows/chipseq.nf b/workflows/chipseq.nf index 4b7fb453..491d5f53 100644 --- a/workflows/chipseq.nf +++ b/workflows/chipseq.nf @@ -22,7 +22,7 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' include { ALIGN_STAR } from '../subworkflows/local/align_star' include { BAM_FILTER_BAMTOOLS } from '../subworkflows/local/bam_filter_bamtools' include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc' -include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf' +include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs3_homer.nf' include { BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 } from '../subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf' /* @@ -446,9 +446,9 @@ workflow CHIPSEQ { .set { ch_ip_control_bam } // - // SUBWORKFLOW: Call peaks with MACS2, annotate with HOMER and perform downstream QC + // SUBWORKFLOW: Call peaks with MACS3, annotate with HOMER and perform downstream QC // - BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER ( + BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER ( ch_ip_control_bam, ch_fasta, ch_gtf, @@ -461,7 +461,7 @@ workflow CHIPSEQ { params.skip_peak_annotation, params.skip_peak_qc ) - ch_versions = ch_versions.mix(BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER.out.versions) + ch_versions = ch_versions.mix(BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER.out.versions) // // Consensus peaks analysis @@ -481,7 +481,7 @@ workflow CHIPSEQ { .set { ch_antibody_bams } BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 ( - BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER.out.peaks, + BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER.out.peaks, ch_antibody_bams, ch_fasta, ch_gtf, @@ -491,8 +491,8 @@ workflow CHIPSEQ { params.skip_peak_annotation, params.skip_deseq2_qc ) - ch_macs2_consensus_bed_lib = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.consensus_bed - ch_macs2_consensus_txt_lib = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.consensus_txt + ch_macs3_consensus_bed_lib = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.consensus_bed + ch_macs3_consensus_txt_lib = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.consensus_txt ch_subreadfeaturecounts_multiqc = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.featurecounts_summary ch_deseq2_pca_multiqc = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.deseq2_qc_pca_multiqc ch_deseq2_clustering_multiqc = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.deseq2_qc_dists_multiqc @@ -508,9 +508,9 @@ workflow CHIPSEQ { params.narrow_peak ? 'narrow_peak' : 'broad_peak', ch_fasta, BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC.out.bigwig.collect{it[1]}.ifEmpty([]), - BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER.out.peaks.collect{it[1]}.ifEmpty([]), - ch_macs2_consensus_bed_lib.collect{it[1]}.ifEmpty([]), - ch_macs2_consensus_txt_lib.collect{it[1]}.ifEmpty([]) + BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER.out.peaks.collect{it[1]}.ifEmpty([]), + ch_macs3_consensus_bed_lib.collect{it[1]}.ifEmpty([]), + ch_macs3_consensus_txt_lib.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix(IGV.out.versions) } @@ -568,9 +568,9 @@ workflow CHIPSEQ { ch_multiqc_phantompeakqualtools_rsc_multiqc.collect{it[1]}.ifEmpty([]), ch_multiqc_phantompeakqualtools_correlation_multiqc.collect{it[1]}.ifEmpty([]), - BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER.out.frip_multiqc.collect{it[1]}.ifEmpty([]), - BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER.out.peak_count_multiqc.collect{it[1]}.ifEmpty([]), - BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER.out.plot_homer_annotatepeaks_tsv.collect().ifEmpty([]), + BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER.out.frip_multiqc.collect{it[1]}.ifEmpty([]), + BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER.out.peak_count_multiqc.collect{it[1]}.ifEmpty([]), + BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER.out.plot_homer_annotatepeaks_tsv.collect().ifEmpty([]), ch_subreadfeaturecounts_multiqc.collect{it[1]}.ifEmpty([]), ch_deseq2_pca_multiqc.collect().ifEmpty([]), From 7238956cb94112d1fcfb9151278eee330f51c203 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 23 Jul 2024 12:58:15 +0200 Subject: [PATCH 7/7] Address review comments --- conf/modules.config | 2 +- test.csv | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) delete mode 100644 test.csv diff --git a/conf/modules.config b/conf/modules.config index eb028a67..2251e0ff 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -575,7 +575,7 @@ process { if (!params.skip_peak_annotation) { process { - withName: '.*:BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER:HOMER_ANNOTATEPEAKS' { + withName: '.*:BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER:HOMER_ANNOTATEPEAKS' { ext.args = '-gid' ext.prefix = { "${meta.id}_peaks" } publishDir = [ diff --git a/test.csv b/test.csv deleted file mode 100644 index c871931f..00000000 --- a/test.csv +++ /dev/null @@ -1,7 +0,0 @@ -sample,fastq_1,fastq_2,replicate,antibody,control,control_replicate -SPT5_T0,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822153_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822153_2.fastq.gz,1,SPT5,SPT5_INPUT,1 -SPT5_T0,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822154_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822154_2.fastq.gz,2,SPT5,SPT5_INPUT,2 -SPT5_T15,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822157_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822157_2.fastq.gz,1,SPT5,SPT5_INPUT,1 -SPT5_T15,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822158_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822158_2.fastq.gz,2,SPT5,SPT5_INPUT,2 -SPT5_INPUT,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,1,,, -SPT5_INPUT,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,2,,,