Skip to content

Commit

Permalink
Added fasta_busco_plot workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
GallVp committed Feb 21, 2024
1 parent 75faf2e commit 64eebf3
Show file tree
Hide file tree
Showing 8 changed files with 156 additions and 2 deletions.
10 changes: 9 additions & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,22 @@ process {
]
}

withName: RUN_BUSCO {
withName: BUSCO {
publishDir = [
path: { "${params.outdir}/busco" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals("versions.yml") ? null : filename }
]
}

withName: BUSCO_PLOT {
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals("versions.yml") ? null : filename }
]
}

withName: RUN_KRAKEN2 {
publishDir = [
path: { "${params.outdir}/kraken2" },
Expand Down
5 changes: 5 additions & 0 deletions lib/WorkflowMain.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ class WorkflowMain {
if (!params.ncbi_fcs_gx_skip && !params.ncbi_fcs_gx_db_path) {
Nextflow.error('ncbi_fcs_gx_db_path must be provided when executing NCBI FCS GX')
}

// Check for busco_lineage_datasets
if (!params.busco_skip && !params.busco_lineage_datasets) {
Nextflow.error('busco_lineage_datasets must be provided when executing BUSCO')
}
}
//
// Get attribute from genome config file e.g. fasta
Expand Down
54 changes: 54 additions & 0 deletions modules/local/busco.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
process BUSCO {
tag "${asm_tag}:${lineage_dataset}"
label 'process_high'

conda "bioconda::busco=5.2.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/busco:5.2.2--pyhdfd78af_0':
'quay.io/biocontainers/busco:5.2.2--pyhdfd78af_0' }"

input:
tuple val(asm_tag), path(fasta_file)
val lineage_dataset
val mode
val download_path

output:
path "${asm_tag}/short_summary.specific.${lineage_dataset}.${asm_tag}_${lineage_initials}.txt" , emit: summary
path "versions.yml" , emit: versions

script:
def lineages_path = download_path ? "--download_path ${download_path}" : ''
lineage_initials = "${lineage_dataset}".split("_")[0]

"""
busco \\
-m ${mode} \\
-o ${asm_tag} \\
-i $fasta_file \\
-l ${lineage_dataset} \\
--update-data \\
$lineages_path \\
-c ${task.cpus}
mv $asm_tag/short_summary.specific.${lineage_dataset}.${asm_tag}.txt \\
$asm_tag/short_summary.specific.${lineage_dataset}.${asm_tag}_${lineage_initials}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' )
END_VERSIONS
"""

stub:
lineage_initials = "${lineage_dataset}".split("_")[0]
"""
mkdir -p $asm_tag
touch $asm_tag/short_summary.specific.${lineage_dataset}.${asm_tag}_${lineage_initials}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' )
END_VERSIONS
"""
}
28 changes: 28 additions & 0 deletions modules/local/busco_plot.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
process BUSCO_PLOT {
tag 'all summaries'
label 'process_single'

conda "bioconda::busco=5.2.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/busco:5.2.2--pyhdfd78af_0':
'quay.io/biocontainers/busco:5.2.2--pyhdfd78af_0' }"

input:
path "short_summary.*", stageAs: 'busco/*'

output:
path 'busco/*.png', emit: png

script:
"""
generate_plot.py \\
-wd ./busco
"""

stub:
"""
mkdir -p busco
touch busco/summary_plot.png
"""
}
2 changes: 1 addition & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
"busco_lineage_datasets": {
"type": "string",
"help_text": "Each input assembly is assessed against each lineage. It should be provided as a space-separated list of lineages: 'fungi_odb10 microsporidia_odb10' ",
"pattern": "^(\\w+_odb10\\s)+\\w+_odb10$",
"pattern": "^(\\w+_odb10\\s)*\\w+_odb10$",
"description": "BUSCO lineages"
},
"busco_download_path": {
Expand Down
32 changes: 32 additions & 0 deletions subworkflows/local/fasta_busco_plot.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
include { BUSCO } from '../../modules/local/busco'
include { BUSCO_PLOT } from '../../modules/local/busco_plot'

workflow FASTA_BUSCO_PLOT {
take:
tuple_of_hap_file // Channel
lineage // val
mode // val
download_path // val; Use [] to use work directory. Useful on AWS

main:
// MODULE: BUSCO
BUSCO(
tuple_of_hap_file,
lineage,
mode,
download_path
)

ch_busco_summaries = BUSCO.out.summary
| collect

// MODULE: BUSCO_PLOT
BUSCO_PLOT ( ch_busco_summaries )

ch_busco_plot = BUSCO_PLOT.out.png

emit:
summary = BUSCO.out.summary
plot = ch_busco_plot
versions = Channel.empty().mix(BUSCO.out.versions.first())
}
3 changes: 3 additions & 0 deletions tests/stub/stub.config
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ params {
ncbi_fcs_gx_tax_id = 12
ncbi_fcs_gx_db_path = 'tests/stub/gxdb/test'

busco_skip = false
busco_lineage_datasets = 'fungi_odb10 hypocreales_odb10'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
Expand Down
24 changes: 24 additions & 0 deletions workflows/assemblyqc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ include { GFF3_VALIDATE } from '../subworkflows/pfr/gff3_val
include { NCBI_FCS_ADAPTOR } from '../modules/local/ncbi_fcs_adaptor'
include { NCBI_FCS_GX } from '../subworkflows/local/ncbi_fcs_gx'
include { ASSEMBLATHON_STATS } from '../modules/local/assemblathon_stats'
include { FASTA_BUSCO_PLOT } from '../subworkflows/local/fasta_busco_plot'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -253,6 +254,29 @@ workflow ASSEMBLYQC {
ch_assemblathon_stats = ASSEMBLATHON_STATS.out.stats
ch_versions = ch_versions.mix(ASSEMBLATHON_STATS.out.versions.first())

// SUBWORKFLOW: FASTA_BUSCO_PLOT
ch_busco_inputs = params.busco_skip
? Channel.empty()
: ch_clean_assembly
| combine(
Channel.of(params.busco_lineage_datasets)
| map { it.split(' ') }
| flatten
)
| map { tag, fa, lineage ->
[ tag, file(fa, checkIfExists: true), lineage ]
}
FASTA_BUSCO_PLOT(
ch_busco_inputs.map { tag, fa, lineage -> [ tag, fa ] },
ch_busco_inputs.map { tag, fa, lineage -> lineage },
params.busco_mode,
params.busco_download_path ?: []
)

ch_busco_summary = FASTA_BUSCO_PLOT.out.summary
ch_busco_plot = FASTA_BUSCO_PLOT.out.plot
ch_versions = ch_versions.mix(FASTA_BUSCO_PLOT.out.versions)

// MODULE: CUSTOM_DUMPSOFTWAREVERSIONS
CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.unique().collectFile(name: 'collated_versions.yml')
Expand Down

0 comments on commit 64eebf3

Please sign in to comment.