From fdd8c9579c052bebcbecaa31456015d0d3c49226 Mon Sep 17 00:00:00 2001 From: prototaxites Date: Wed, 17 May 2023 10:41:10 +0000 Subject: [PATCH 1/6] Move preprocessing/assembly/taxonomy to subworkflows --- nextflow.config | 1 + subworkflows/local/assembly.nf | 133 +++++++ subworkflows/local/long_read_preprocess.nf | 68 ++++ subworkflows/local/short_read_preprocess.nf | 144 +++++++ subworkflows/local/short_read_taxonomy.nf | 62 +++ workflows/mag.nf | 414 ++------------------ 6 files changed, 445 insertions(+), 377 deletions(-) create mode 100644 subworkflows/local/assembly.nf create mode 100644 subworkflows/local/long_read_preprocess.nf create mode 100644 subworkflows/local/short_read_preprocess.nf create mode 100644 subworkflows/local/short_read_taxonomy.nf diff --git a/nextflow.config b/nextflow.config index f3705d68..c5c3cf85 100644 --- a/nextflow.config +++ b/nextflow.config @@ -290,6 +290,7 @@ profiles { test_ancient_dna { includeConfig 'conf/test_ancient_dna.config' } test_adapterremoval { includeConfig 'conf/test_adapterremoval.config' } test_binrefinement { includeConfig 'conf/test_binrefinement.config' } + test_binning_entry { includeConfig 'conf/test_binning_entry.config' } test_no_clipping { includeConfig 'conf/test_no_clipping.config' } test_bbnorm { includeConfig 'conf/test_bbnorm.config' } } diff --git a/subworkflows/local/assembly.nf b/subworkflows/local/assembly.nf new file mode 100644 index 00000000..c3d5f482 --- /dev/null +++ b/subworkflows/local/assembly.nf @@ -0,0 +1,133 @@ +include { POOL_SINGLE_READS as POOL_SHORT_SINGLE_READS } from '../../modules/local/pool_single_reads' +include { POOL_PAIRED_READS } from '../../modules/local/pool_paired_reads' +include { POOL_SINGLE_READS as POOL_LONG_READS } from '../../modules/local/pool_single_reads' +include { MEGAHIT } from '../../modules/local/megahit' +include { SPADES } from '../../modules/local/spades' +include { SPADESHYBRID } from '../../modules/local/spadeshybrid' + +workflow ASSEMBLY { + take: + short_reads_assembly + long_reads + + main: + ch_versions = Channel.empty() + + // Co-assembly: prepare grouping for MEGAHIT and for pooling for SPAdes + if (params.coassemble_group) { + // short reads + // group and set group as new id + ch_short_reads_grouped = short_reads_assembly + .map { meta, reads -> [ meta.group, meta, reads ] } + .groupTuple(by: 0) + .map { group, metas, reads -> + def assemble_as_single = params.single_end || ( params.bbnorm && params.coassemble_group ) + def meta = [:] + meta.id = "group-$group" + meta.group = group + meta.single_end = assemble_as_single + if ( assemble_as_single ) [ meta, reads.collect { it }, [] ] + else [ meta, reads.collect { it[0] }, reads.collect { it[1] } ] + } + // long reads + // group and set group as new id + ch_long_reads_grouped = long_reads + .map { meta, reads -> [ meta.group, meta, reads ] } + .groupTuple(by: 0) + .map { group, metas, reads -> + def meta = [:] + meta.id = "group-$group" + meta.group = group + [ meta, reads.collect { it } ] + } + } else { + ch_short_reads_grouped = short_reads_assembly + .filter { it[0].single_end } + .map { meta, reads -> [ meta, [ reads ], [] ] } + .mix ( + short_reads_assembly + .filter { ! it[0].single_end } + .map { meta, reads -> [ meta, [ reads[0] ], [ reads[1] ] ] } + ) + ch_long_reads_grouped = long_reads + } + + ch_assemblies = Channel.empty() + if (!params.skip_megahit){ + MEGAHIT ( ch_short_reads_grouped ) + ch_megahit_assemblies = MEGAHIT.out.assembly + .map { meta, assembly -> + def meta_new = meta.clone() + meta_new.assembler = "MEGAHIT" + [ meta_new, assembly ] + } + ch_assemblies = ch_assemblies.mix(ch_megahit_assemblies) + ch_versions = ch_versions.mix(MEGAHIT.out.versions.first()) + } + + // Co-assembly: pool reads for SPAdes + if ( ! params.skip_spades || ! params.skip_spadeshybrid ){ + if ( params.coassemble_group ) { + if ( params.bbnorm ) { + ch_short_reads_spades = ch_short_reads_grouped.map { [ it[0], it[1] ] } + } else { + POOL_SHORT_SINGLE_READS ( + ch_short_reads_grouped + .filter { it[0].single_end } + ) + POOL_PAIRED_READS ( + ch_short_reads_grouped + .filter { ! it[0].single_end } + ) + ch_short_reads_spades = POOL_SHORT_SINGLE_READS.out.reads + .mix(POOL_PAIRED_READS.out.reads) + } + } else { + ch_short_reads_spades = short_reads_assembly + } + // long reads + if (!params.single_end && !params.skip_spadeshybrid){ + POOL_LONG_READS ( ch_long_reads_grouped ) + ch_long_reads_spades = POOL_LONG_READS.out.reads + } else { + ch_long_reads_spades = Channel.empty() + } + } else { + ch_short_reads_spades = Channel.empty() + ch_long_reads_spades = Channel.empty() + } + + if (!params.single_end && !params.skip_spades){ + SPADES ( ch_short_reads_spades ) + ch_spades_assemblies = SPADES.out.assembly + .map { meta, assembly -> + def meta_new = meta.clone() + meta_new.assembler = "SPAdes" + [ meta_new, assembly ] + } + ch_assemblies = ch_assemblies.mix(ch_spades_assemblies) + ch_versions = ch_versions.mix(SPADES.out.versions.first()) + } + + if (!params.single_end && !params.skip_spadeshybrid){ + ch_short_reads_spades_tmp = ch_short_reads_spades + .map { meta, reads -> [ meta.id, meta, reads ] } + ch_reads_spadeshybrid = ch_long_reads_spades + .map { meta, reads -> [ meta.id, meta, reads ] } + .combine(ch_short_reads_spades_tmp, by: 0) + .map { id, meta_long, long_reads, meta_short, short_reads -> [ meta_short, long_reads, short_reads ] } + SPADESHYBRID ( ch_reads_spadeshybrid ) + ch_spadeshybrid_assemblies = SPADESHYBRID.out.assembly + .map { meta, assembly -> + def meta_new = meta.clone() + meta_new.assembler = "SPAdesHybrid" + [ meta_new, assembly ] + } + ch_assemblies = ch_assemblies.mix(ch_spadeshybrid_assemblies) + ch_versions = ch_versions.mix(SPADESHYBRID.out.versions.first()) + } + + emit: + assemblies = ch_assemblies + versions = ch_versions +} diff --git a/subworkflows/local/long_read_preprocess.nf b/subworkflows/local/long_read_preprocess.nf new file mode 100644 index 00000000..bdfef54e --- /dev/null +++ b/subworkflows/local/long_read_preprocess.nf @@ -0,0 +1,68 @@ +include { PORECHOP } from '../../modules/local/porechop' +include { NANOLYSE } from '../../modules/local/nanolyse' +include { FILTLONG } from '../../modules/local/filtlong' +include { NANOPLOT as NANOPLOT_RAW } from '../../modules/local/nanoplot' +include { NANOPLOT as NANOPLOT_FILTERED } from '../../modules/local/nanoplot' + +workflow LONG_READ_PREPROCESS { + take: + raw_long_reads + short_reads + + main: + ch_versions = Channel.empty() + + // Databases and references + if (!params.keep_lambda) { + ch_nanolyse_db = Channel + .value(file( "${params.lambda_reference}" )) + } + + // Preprocessing + NANOPLOT_RAW ( + raw_long_reads + ) + ch_versions = ch_versions.mix(NANOPLOT_RAW.out.versions.first()) + + ch_long_reads = raw_long_reads + + if (!params.skip_adapter_trimming) { + PORECHOP ( + ch_long_reads + ) + ch_long_reads = PORECHOP.out.reads + ch_versions = ch_versions.mix(PORECHOP.out.versions.first()) + } + + if (!params.keep_lambda) { + NANOLYSE ( + ch_long_reads, + ch_nanolyse_db + ) + ch_long_reads = NANOLYSE.out.reads + ch_versions = ch_versions.mix(NANOLYSE.out.versions.first()) + } + + // join long and short reads by sample name + ch_short_reads_tmp = short_reads + .map { meta, sr -> [ meta.id, meta, sr ] } + + ch_short_and_long_reads = ch_long_reads + .map { meta, lr -> [ meta.id, meta, lr ] } + .join(ch_short_reads_tmp, by: 0) + .map { id, meta_lr, lr, meta_sr, sr -> [ meta_lr, lr, sr[0], sr[1] ] } // should not occur for single-end, since SPAdes (hybrid) does not support single-end + + FILTLONG ( + ch_short_and_long_reads + ) + ch_long_reads = FILTLONG.out.reads + ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) + + NANOPLOT_FILTERED ( + ch_long_reads + ) + + emit: + long_reads = ch_long_reads + versions = ch_versions +} diff --git a/subworkflows/local/short_read_preprocess.nf b/subworkflows/local/short_read_preprocess.nf new file mode 100644 index 00000000..eecd63b3 --- /dev/null +++ b/subworkflows/local/short_read_preprocess.nf @@ -0,0 +1,144 @@ +include { FASTP } from '../../modules/nf-core/fastp/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../../modules/nf-core/adapterremoval/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../../modules/nf-core/adapterremoval/main' +include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../../modules/local/bowtie2_removal_build' +include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../../modules/local/bowtie2_removal_align' +include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../../modules/local/bowtie2_removal_build' +include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../../modules/local/bowtie2_removal_align' +include { FASTQC as FASTQC_TRIMMED } from '../../modules/nf-core/fastqc/main' +include { SEQTK_MERGEPE } from '../../modules/nf-core/seqtk/mergepe/main' +include { BBMAP_BBNORM } from '../../modules/nf-core/bbmap/bbnorm/main' + +workflow SHORT_READ_PREPROCESS { + take: + raw_short_reads + + main: + ch_versions = Channel.empty() + ch_short_reads = raw_short_reads + + // Databases and references + if ( params.host_genome ) { + host_fasta = params.genomes[params.host_genome].fasta ?: false + ch_host_fasta = Channel + .value(file( "${host_fasta}" )) + host_bowtie2index = params.genomes[params.host_genome].bowtie2 ?: false + ch_host_bowtie2index = Channel + .value(file( "${host_bowtie2index}/*" )) + } else if ( params.host_fasta ) { + ch_host_fasta = Channel + .value(file( "${params.host_fasta}" )) + } else { + ch_host_fasta = Channel.empty() + } + + if(!params.keep_phix) { + ch_phix_db_file = Channel + .value(file( "${params.phix_reference}" )) + } + + // MultiQC outputs + ch_multiqc_readprep = Channel.empty() + ch_bowtie2_removal_host_multiqc = Channel.empty() + ch_fastqc_trimmed_multiqc = Channel.empty() + + // Preprocessing + if ( !params.skip_clipping ) { + if ( params.clip_tool == 'fastp' ) { + ch_clipmerge_out = FASTP ( + ch_short_reads, + [], + params.fastp_save_trimmed_fail, + [] + ) + ch_short_reads = FASTP.out.reads + ch_multiqc_readprep = FASTP.out.json.collect{it[1]}.ifEmpty([]) + ch_versions = ch_versions.mix(FASTP.out.versions.first()) + + } else if ( params.clip_tool == 'adapterremoval' ) { + + // due to strange output file scheme in AR2, have to manually separate + // SE/PE to allow correct pulling of reads after. + ch_adapterremoval_in = ch_short_reads + .branch { + single: it[0]['single_end'] + paired: !it[0]['single_end'] + } + + ADAPTERREMOVAL_PE ( ch_adapterremoval_in.paired, [] ) + ADAPTERREMOVAL_SE ( ch_adapterremoval_in.single, [] ) + + ch_short_reads = Channel.empty() + ch_short_reads = ch_short_reads.mix(ADAPTERREMOVAL_SE.out.singles_truncated, ADAPTERREMOVAL_PE.out.paired_truncated) + ch_multiqc_readprep = ch_multiqc_readprep.mix(ADAPTERREMOVAL_PE.out.settings.collect{it[1]}.ifEmpty([]), ADAPTERREMOVAL_SE.out.settings.collect{it[1]}.ifEmpty([])) + ch_versions = ch_versions.mix(ADAPTERREMOVAL_PE.out.versions.first(), ADAPTERREMOVAL_SE.out.versions.first()) + } + } + + if (params.host_fasta){ + BOWTIE2_HOST_REMOVAL_BUILD ( + ch_host_fasta + ) + ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index + } + + if (params.host_fasta || params.host_genome){ + BOWTIE2_HOST_REMOVAL_ALIGN ( + ch_short_reads, + ch_host_bowtie2index + ) + ch_short_reads = BOWTIE2_HOST_REMOVAL_ALIGN.out.reads + ch_bowtie2_removal_host_multiqc = BOWTIE2_HOST_REMOVAL_ALIGN.out.log + ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.versions.first()) + } + + if(!params.keep_phix) { + BOWTIE2_PHIX_REMOVAL_BUILD ( + ch_phix_db_file + ) + BOWTIE2_PHIX_REMOVAL_ALIGN ( + ch_short_reads, + BOWTIE2_PHIX_REMOVAL_BUILD.out.index + ) + ch_short_reads = BOWTIE2_PHIX_REMOVAL_ALIGN.out.reads + ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.versions.first()) + } + + if (!(params.keep_phix && params.skip_clipping && !(params.host_genome || params.host_fasta))) { + FASTQC_TRIMMED ( + ch_short_reads + ) + ch_fastqc_trimmed_multiqc = FASTQC_TRIMMED.out.zip.collect{it[1]}.ifEmpty([]) + ch_versions = ch_versions.mix(FASTQC_TRIMMED.out.versions) + } + + if ( params.bbnorm ) { + if ( params.coassemble_group ) { + // Interleave pairs, to be able to treat them as single ends when calling bbnorm. This prepares + // for dropping the single_end parameter, but keeps assembly modules as they are, i.e. not + // accepting a mix of single end and pairs. + SEQTK_MERGEPE ( + ch_short_reads.filter { ! it[0].single_end } + ) + ch_versions = ch_versions.mix(SEQTK_MERGEPE.out.versions.first()) + // Combine the interleaved pairs with any single end libraries. Set the meta.single_end to true (used by the bbnorm module). + ch_bbnorm = SEQTK_MERGEPE.out.reads + .mix(ch_short_reads.filter { it[0].single_end }) + .map { [ [ id: sprintf("group%s", it[0].group), group: it[0].group, single_end: true ], it[1] ] } + .groupTuple() + } else { + ch_bbnorm = ch_short_reads + } + BBMAP_BBNORM ( ch_bbnorm ) + ch_versions = ch_versions.mix(BBMAP_BBNORM.out.versions) + ch_short_reads_bbnorm = BBMAP_BBNORM.out.fastq + } + + emit: + short_reads = ch_short_reads + short_reads_assembly = params.bbnorm ? ch_short_reads_bbnorm : ch_short_reads + multiqc_readprep = ch_multiqc_readprep + multiqc_bowtie2_removal_host = ch_bowtie2_removal_host_multiqc + multiqc_fastqc_trimmed = ch_fastqc_trimmed_multiqc + versions = ch_versions +} diff --git a/subworkflows/local/short_read_taxonomy.nf b/subworkflows/local/short_read_taxonomy.nf new file mode 100644 index 00000000..79e4484e --- /dev/null +++ b/subworkflows/local/short_read_taxonomy.nf @@ -0,0 +1,62 @@ +include { CENTRIFUGE_DB_PREPARATION } from '../../modules/local/centrifuge_db_preparation' +include { CENTRIFUGE } from '../../modules/local/centrifuge' +include { KRAKEN2_DB_PREPARATION } from '../../modules/local/kraken2_db_preparation' +include { KRAKEN2 } from '../../modules/local/kraken2' +include { KRONA_DB } from '../../modules/local/krona_db' +include { KRONA } from '../../modules/local/krona' + +workflow SHORT_READ_TAXONOMY { + take: + short_reads + + main: + ch_versions = Channel.empty() + + if(params.centrifuge_db){ + ch_centrifuge_db_file = Channel + .value(file( "${params.centrifuge_db}" )) + } else { + ch_centrifuge_db_file = Channel.empty() + } + + if(params.kraken2_db){ + ch_kraken2_db_file = Channel + .value(file( "${params.kraken2_db}" )) + } else { + ch_kraken2_db_file = Channel.empty() + } + + CENTRIFUGE_DB_PREPARATION ( ch_centrifuge_db_file ) + CENTRIFUGE ( + short_reads, + CENTRIFUGE_DB_PREPARATION.out.db + ) + ch_versions = ch_versions.mix(CENTRIFUGE.out.versions.first()) + + KRAKEN2_DB_PREPARATION ( + ch_kraken2_db_file + ) + KRAKEN2 ( + short_reads, + KRAKEN2_DB_PREPARATION.out.db + ) + ch_versions = ch_versions.mix(KRAKEN2.out.versions.first()) + + if (( params.centrifuge_db || params.kraken2_db ) && !params.skip_krona){ + KRONA_DB () + ch_tax_classifications = CENTRIFUGE.out.results_for_krona.mix(KRAKEN2.out.results_for_krona) + . map { classifier, meta, report -> + def meta_new = meta.clone() + meta_new.classifier = classifier + [ meta_new, report ] + } + KRONA ( + ch_tax_classifications, + KRONA_DB.out.db.collect() + ) + ch_versions = ch_versions.mix(KRONA.out.versions.first()) + } + + emit: + versions = ch_versions +} diff --git a/workflows/mag.nf b/workflows/mag.nf index c2c7cacd..c795daed 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -55,27 +55,6 @@ ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.mu // // MODULE: Local to the pipeline // -include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../modules/local/bowtie2_removal_build' -include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' -include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../modules/local/bowtie2_removal_build' -include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' -include { PORECHOP } from '../modules/local/porechop' -include { NANOLYSE } from '../modules/local/nanolyse' -include { FILTLONG } from '../modules/local/filtlong' -include { NANOPLOT as NANOPLOT_RAW } from '../modules/local/nanoplot' -include { NANOPLOT as NANOPLOT_FILTERED } from '../modules/local/nanoplot' -include { CENTRIFUGE_DB_PREPARATION } from '../modules/local/centrifuge_db_preparation' -include { CENTRIFUGE } from '../modules/local/centrifuge' -include { KRAKEN2_DB_PREPARATION } from '../modules/local/kraken2_db_preparation' -include { KRAKEN2 } from '../modules/local/kraken2' -include { KRONA_DB } from '../modules/local/krona_db' -include { KRONA } from '../modules/local/krona' -include { POOL_SINGLE_READS as POOL_SHORT_SINGLE_READS } from '../modules/local/pool_single_reads' -include { POOL_PAIRED_READS } from '../modules/local/pool_paired_reads' -include { POOL_SINGLE_READS as POOL_LONG_READS } from '../modules/local/pool_single_reads' -include { MEGAHIT } from '../modules/local/megahit' -include { SPADES } from '../modules/local/spades' -include { SPADESHYBRID } from '../modules/local/spadeshybrid' include { QUAST } from '../modules/local/quast' include { QUAST_BINS } from '../modules/local/quast_bins' include { QUAST_BINS_SUMMARY } from '../modules/local/quast_bins_summary' @@ -90,14 +69,18 @@ include { MULTIQC } from '../modules // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { BINNING_PREPARATION } from '../subworkflows/local/binning_preparation' -include { BINNING } from '../subworkflows/local/binning' -include { BINNING_REFINEMENT } from '../subworkflows/local/binning_refinement' -include { BUSCO_QC } from '../subworkflows/local/busco_qc' -include { CHECKM_QC } from '../subworkflows/local/checkm_qc' -include { GUNC_QC } from '../subworkflows/local/gunc_qc' -include { GTDBTK } from '../subworkflows/local/gtdbtk' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { SHORT_READ_PREPROCESS } from '../subworkflows/local/short_read_preprocess' +include { LONG_READ_PREPROCESS } from '../subworkflows/local/long_read_preprocess' +include { SHORT_READ_TAXONOMY } from '../subworkflows/local/short_read_taxonomy' +include { ASSEMBLY } from '../subworkflows/local/assembly' +include { BINNING_PREPARATION } from '../subworkflows/local/binning_preparation' +include { BINNING } from '../subworkflows/local/binning' +include { BINNING_REFINEMENT } from '../subworkflows/local/binning_refinement' +include { BUSCO_QC } from '../subworkflows/local/busco_qc' +include { CHECKM_QC } from '../subworkflows/local/checkm_qc' +include { GUNC_QC } from '../subworkflows/local/gunc_qc' +include { GTDBTK } from '../subworkflows/local/gtdbtk' include { ANCIENT_DNA_ASSEMBLY_VALIDATION } from '../subworkflows/local/ancient_dna' /* @@ -111,12 +94,6 @@ include { ANCIENT_DNA_ASSEMBLY_VALIDATION } from '../subworkflows/local/ancient_ // include { ARIA2 as ARIA2_UNTAR } from '../modules/nf-core/aria2/main' include { FASTQC as FASTQC_RAW } from '../modules/nf-core/fastqc/main' -include { FASTQC as FASTQC_TRIMMED } from '../modules/nf-core/fastqc/main' -include { SEQTK_MERGEPE } from '../modules/nf-core/seqtk/mergepe/main' -include { BBMAP_BBNORM } from '../modules/nf-core/bbmap/bbnorm/main' -include { FASTP } from '../modules/nf-core/fastp/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../modules/nf-core/adapterremoval/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../modules/nf-core/adapterremoval/main' include { PRODIGAL } from '../modules/nf-core/prodigal/main' include { PROKKA } from '../modules/nf-core/prokka/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' @@ -125,20 +102,6 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/cust /* -- Create channel for reference databases -- */ //////////////////////////////////////////////////// -if ( params.host_genome ) { - host_fasta = params.genomes[params.host_genome].fasta ?: false - ch_host_fasta = Channel - .value(file( "${host_fasta}" )) - host_bowtie2index = params.genomes[params.host_genome].bowtie2 ?: false - ch_host_bowtie2index = Channel - .value(file( "${host_bowtie2index}/*" )) -} else if ( params.host_fasta ) { - ch_host_fasta = Channel - .value(file( "${params.host_fasta}" )) -} else { - ch_host_fasta = Channel.empty() -} - if(params.busco_reference){ ch_busco_db_file = Channel .value(file( "${params.busco_reference}" )) @@ -162,20 +125,6 @@ if (params.gunc_db) { ch_gunc_db = Channel.empty() } -if(params.centrifuge_db){ - ch_centrifuge_db_file = Channel - .value(file( "${params.centrifuge_db}" )) -} else { - ch_centrifuge_db_file = Channel.empty() -} - -if(params.kraken2_db){ - ch_kraken2_db_file = Channel - .value(file( "${params.kraken2_db}" )) -} else { - ch_kraken2_db_file = Channel.empty() -} - if(params.cat_db){ ch_cat_db_file = Channel .value(file( "${params.cat_db}" )) @@ -183,16 +132,6 @@ if(params.cat_db){ ch_cat_db_file = Channel.empty() } -if(!params.keep_phix) { - ch_phix_db_file = Channel - .value(file( "${params.phix_reference}" )) -} - -if (!params.keep_lambda) { - ch_nanolyse_db = Channel - .value(file( "${params.lambda_reference}" )) -} - gtdb = params.skip_binqc ? false : params.gtdb if (gtdb) { ch_gtdb = Channel @@ -222,7 +161,6 @@ workflow MAG { ch_checkm_db = ARIA2_UNTAR.out.downloaded_file } - // // SUBWORKFLOW: Read in samplesheet, validate and stage input files // @@ -242,103 +180,17 @@ workflow MAG { ) ch_versions = ch_versions.mix(FASTQC_RAW.out.versions.first()) - ch_bowtie2_removal_host_multiqc = Channel.empty() - if ( !params.assembly_input ) { - if ( !params.skip_clipping ) { - if ( params.clip_tool == 'fastp' ) { - ch_clipmerge_out = FASTP ( - ch_raw_short_reads, - [], - params.fastp_save_trimmed_fail, - [] - ) - ch_short_reads = FASTP.out.reads - ch_versions = ch_versions.mix(FASTP.out.versions.first()) - - } else if ( params.clip_tool == 'adapterremoval' ) { - - // due to strange output file scheme in AR2, have to manually separate - // SE/PE to allow correct pulling of reads after. - ch_adapterremoval_in = ch_raw_short_reads - .branch { - single: it[0]['single_end'] - paired: !it[0]['single_end'] - } - - ADAPTERREMOVAL_PE ( ch_adapterremoval_in.paired, [] ) - ADAPTERREMOVAL_SE ( ch_adapterremoval_in.single, [] ) - - ch_short_reads = Channel.empty() - ch_short_reads = ch_short_reads.mix(ADAPTERREMOVAL_SE.out.singles_truncated, ADAPTERREMOVAL_PE.out.paired_truncated) - - ch_versions = ch_versions.mix(ADAPTERREMOVAL_PE.out.versions.first(), ADAPTERREMOVAL_SE.out.versions.first()) + ch_short_reads_preprocess = params.assembly_input ? Channel.empty() : ch_raw_short_reads - } - } else { - ch_short_reads = ch_raw_short_reads - } + SHORT_READ_PREPROCESS(ch_short_reads_preprocess) + ch_versions = ch_versions.mix(SHORT_READ_PREPROCESS.out.versions) - if (params.host_fasta){ - BOWTIE2_HOST_REMOVAL_BUILD ( - ch_host_fasta - ) - ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index - } - - if (params.host_fasta || params.host_genome){ - BOWTIE2_HOST_REMOVAL_ALIGN ( - ch_short_reads, - ch_host_bowtie2index - ) - ch_short_reads = BOWTIE2_HOST_REMOVAL_ALIGN.out.reads - ch_bowtie2_removal_host_multiqc = BOWTIE2_HOST_REMOVAL_ALIGN.out.log - ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.versions.first()) - } - - if(!params.keep_phix) { - BOWTIE2_PHIX_REMOVAL_BUILD ( - ch_phix_db_file - ) - BOWTIE2_PHIX_REMOVAL_ALIGN ( - ch_short_reads, - BOWTIE2_PHIX_REMOVAL_BUILD.out.index - ) - ch_short_reads = BOWTIE2_PHIX_REMOVAL_ALIGN.out.reads - ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.versions.first()) - } - - if (!(params.keep_phix && params.skip_clipping && !(params.host_genome || params.host_fasta))) { - FASTQC_TRIMMED ( - ch_short_reads - ) - ch_versions = ch_versions.mix(FASTQC_TRIMMED.out.versions) - } - - if ( params.bbnorm ) { - if ( params.coassemble_group ) { - // Interleave pairs, to be able to treat them as single ends when calling bbnorm. This prepares - // for dropping the single_end parameter, but keeps assembly modules as they are, i.e. not - // accepting a mix of single end and pairs. - SEQTK_MERGEPE ( - ch_short_reads.filter { ! it[0].single_end } - ) - ch_versions = ch_versions.mix(SEQTK_MERGEPE.out.versions.first()) - // Combine the interleaved pairs with any single end libraries. Set the meta.single_end to true (used by the bbnorm module). - ch_bbnorm = SEQTK_MERGEPE.out.reads - .mix(ch_short_reads.filter { it[0].single_end }) - .map { [ [ id: sprintf("group%s", it[0].group), group: it[0].group, single_end: true ], it[1] ] } - .groupTuple() - } else { - ch_bbnorm = ch_short_reads - } - BBMAP_BBNORM ( ch_bbnorm ) - ch_versions = ch_versions.mix(BBMAP_BBNORM.out.versions) - ch_short_reads_assembly = BBMAP_BBNORM.out.fastq - } else { - ch_short_reads_assembly = ch_short_reads - } + if ( !params.assembly_input ) { + ch_short_reads = SHORT_READ_PREPROCESS.out.short_reads + ch_short_reads_assembly = SHORT_READ_PREPROCESS.out.short_reads_assembly } else { - ch_short_reads = ch_raw_short_reads + ch_short_reads = ch_raw_short_reads + ch_short_reads_assembly = Channel.empty() } /* @@ -346,86 +198,25 @@ workflow MAG { Preprocessing and QC for long reads ================================================================================ */ - NANOPLOT_RAW ( - ch_raw_long_reads - ) - ch_versions = ch_versions.mix(NANOPLOT_RAW.out.versions.first()) - - ch_long_reads = ch_raw_long_reads - - if ( !params.assembly_input ) { - if (!params.skip_adapter_trimming) { - PORECHOP ( - ch_raw_long_reads - ) - ch_long_reads = PORECHOP.out.reads - ch_versions = ch_versions.mix(PORECHOP.out.versions.first()) - } - - if (!params.keep_lambda) { - NANOLYSE ( - ch_long_reads, - ch_nanolyse_db - ) - ch_long_reads = NANOLYSE.out.reads - ch_versions = ch_versions.mix(NANOLYSE.out.versions.first()) - } - // join long and short reads by sample name - ch_short_reads_tmp = ch_short_reads - .map { meta, sr -> [ meta.id, meta, sr ] } + ch_long_reads_preprocess = params.assembly_input ? Channel.empty() : ch_raw_long_reads - ch_short_and_long_reads = ch_long_reads - .map { meta, lr -> [ meta.id, meta, lr ] } - .join(ch_short_reads_tmp, by: 0) - .map { id, meta_lr, lr, meta_sr, sr -> [ meta_lr, lr, sr[0], sr[1] ] } // should not occur for single-end, since SPAdes (hybrid) does not support single-end + LONG_READ_PREPROCESS(ch_long_reads_preprocess, ch_short_reads) + ch_versions = ch_versions.mix(LONG_READ_PREPROCESS.out.versions) - FILTLONG ( - ch_short_and_long_reads - ) - ch_long_reads = FILTLONG.out.reads - ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) - - NANOPLOT_FILTERED ( - ch_long_reads - ) + if ( !params.assembly_input ) { + ch_long_reads = LONG_READ_PREPROCESS.out.long_reads + } else { + ch_long_reads = Channel.empty() } - /* ================================================================================ Taxonomic information ================================================================================ */ - CENTRIFUGE_DB_PREPARATION ( ch_centrifuge_db_file ) - CENTRIFUGE ( - ch_short_reads, - CENTRIFUGE_DB_PREPARATION.out.db - ) - ch_versions = ch_versions.mix(CENTRIFUGE.out.versions.first()) - KRAKEN2_DB_PREPARATION ( - ch_kraken2_db_file - ) - KRAKEN2 ( - ch_short_reads, - KRAKEN2_DB_PREPARATION.out.db - ) - ch_versions = ch_versions.mix(KRAKEN2.out.versions.first()) - - if (( params.centrifuge_db || params.kraken2_db ) && !params.skip_krona){ - KRONA_DB () - ch_tax_classifications = CENTRIFUGE.out.results_for_krona.mix(KRAKEN2.out.results_for_krona) - . map { classifier, meta, report -> - def meta_new = meta.clone() - meta_new.classifier = classifier - [ meta_new, report ] - } - KRONA ( - ch_tax_classifications, - KRONA_DB.out.db.collect() - ) - ch_versions = ch_versions.mix(KRONA.out.versions.first()) - } + SHORT_READ_TAXONOMY(ch_short_reads) + ch_versions = ch_versions.mix(SHORT_READ_TAXONOMY.out.versions) /* ================================================================================ @@ -433,129 +224,16 @@ workflow MAG { ================================================================================ */ - if ( !params.assembly_input ) { - // Co-assembly: prepare grouping for MEGAHIT and for pooling for SPAdes - if (params.coassemble_group) { - // short reads - // group and set group as new id - ch_short_reads_grouped = ch_short_reads_assembly - .map { meta, reads -> [ meta.group, meta, reads ] } - .groupTuple(by: 0) - .map { group, metas, reads -> - def assemble_as_single = params.single_end || ( params.bbnorm && params.coassemble_group ) - def meta = [:] - meta.id = "group-$group" - meta.group = group - meta.single_end = assemble_as_single - if ( assemble_as_single ) [ meta, reads.collect { it }, [] ] - else [ meta, reads.collect { it[0] }, reads.collect { it[1] } ] - } - // long reads - // group and set group as new id - ch_long_reads_grouped = ch_long_reads - .map { meta, reads -> [ meta.group, meta, reads ] } - .groupTuple(by: 0) - .map { group, metas, reads -> - def meta = [:] - meta.id = "group-$group" - meta.group = group - [ meta, reads.collect { it } ] - } - } else { - ch_short_reads_grouped = ch_short_reads_assembly - .filter { it[0].single_end } - .map { meta, reads -> [ meta, [ reads ], [] ] } - .mix ( - ch_short_reads_assembly - .filter { ! it[0].single_end } - .map { meta, reads -> [ meta, [ reads[0] ], [ reads[1] ] ] } - ) - ch_long_reads_grouped = ch_long_reads - } - - ch_assemblies = Channel.empty() - if (!params.skip_megahit){ - MEGAHIT ( ch_short_reads_grouped ) - ch_megahit_assemblies = MEGAHIT.out.assembly - .map { meta, assembly -> - def meta_new = meta.clone() - meta_new.assembler = "MEGAHIT" - [ meta_new, assembly ] - } - ch_assemblies = ch_assemblies.mix(ch_megahit_assemblies) - ch_versions = ch_versions.mix(MEGAHIT.out.versions.first()) - } - - // Co-assembly: pool reads for SPAdes - if ( ! params.skip_spades || ! params.skip_spadeshybrid ){ - if ( params.coassemble_group ) { - if ( params.bbnorm ) { - ch_short_reads_spades = ch_short_reads_grouped.map { [ it[0], it[1] ] } - } else { - POOL_SHORT_SINGLE_READS ( - ch_short_reads_grouped - .filter { it[0].single_end } - ) - POOL_PAIRED_READS ( - ch_short_reads_grouped - .filter { ! it[0].single_end } - ) - ch_short_reads_spades = POOL_SHORT_SINGLE_READS.out.reads - .mix(POOL_PAIRED_READS.out.reads) - } - } else { - ch_short_reads_spades = ch_short_reads_assembly - } - // long reads - if (!params.single_end && !params.skip_spadeshybrid){ - POOL_LONG_READS ( ch_long_reads_grouped ) - ch_long_reads_spades = POOL_LONG_READS.out.reads - } else { - ch_long_reads_spades = Channel.empty() - } - } else { - ch_short_reads_spades = Channel.empty() - ch_long_reads_spades = Channel.empty() - } - - if (!params.single_end && !params.skip_spades){ - SPADES ( ch_short_reads_spades ) - ch_spades_assemblies = SPADES.out.assembly - .map { meta, assembly -> - def meta_new = meta.clone() - meta_new.assembler = "SPAdes" - [ meta_new, assembly ] - } - ch_assemblies = ch_assemblies.mix(ch_spades_assemblies) - ch_versions = ch_versions.mix(SPADES.out.versions.first()) - } - - if (!params.single_end && !params.skip_spadeshybrid){ - ch_short_reads_spades_tmp = ch_short_reads_spades - .map { meta, reads -> [ meta.id, meta, reads ] } - ch_reads_spadeshybrid = ch_long_reads_spades - .map { meta, reads -> [ meta.id, meta, reads ] } - .combine(ch_short_reads_spades_tmp, by: 0) - .map { id, meta_long, long_reads, meta_short, short_reads -> [ meta_short, long_reads, short_reads ] } - SPADESHYBRID ( ch_reads_spadeshybrid ) - ch_spadeshybrid_assemblies = SPADESHYBRID.out.assembly - .map { meta, assembly -> - def meta_new = meta.clone() - meta_new.assembler = "SPAdesHybrid" - [ meta_new, assembly ] - } - ch_assemblies = ch_assemblies.mix(ch_spadeshybrid_assemblies) - ch_versions = ch_versions.mix(SPADESHYBRID.out.versions.first()) - } - } else { - ch_assemblies = ch_input_assemblies - } + ch_assemblies = Channel.empty() + ASSEMBLY(ch_short_reads_assembly, ch_long_reads) + ch_assemblies = ch_assemblies.mix(ch_input_assemblies, ASSEMBLY.out.assemblies) + ch_versions = ch_versions.mix(ASSEMBLY.out.versions) ch_quast_multiqc = Channel.empty() if (!params.skip_quast){ QUAST ( ch_assemblies ) ch_quast_multiqc = QUAST.out.qc - ch_versions = ch_versions.mix(QUAST.out.versions.first()) + ch_versions = ch_versions.mix(QUAST.out.versions.first()) } /* @@ -824,34 +502,16 @@ workflow MAG { ) */ - ch_multiqc_readprep = Channel.empty() - if (!params.assembly_input) { - if (!params.skip_clipping) { - if ( params.clip_tool == "fastp") { - ch_multiqc_readprep = ch_multiqc_readprep.mix(FASTP.out.json.collect{it[1]}.ifEmpty([])) - } else if ( params.clip_tool == "adapterremoval" ) { - ch_multiqc_readprep = ch_multiqc_readprep.mix(ADAPTERREMOVAL_PE.out.settings.collect{it[1]}.ifEmpty([]), ADAPTERREMOVAL_SE.out.settings.collect{it[1]}.ifEmpty([])) - } - } - } - - ch_fastqc_trimmed_multiqc = Channel.empty() - if (!params.assembly_input) { - if (!(params.keep_phix && params.skip_clipping && !(params.host_genome || params.host_fasta))) { - ch_fastqc_trimmed_multiqc = FASTQC_TRIMMED.out.zip.collect{it[1]}.ifEmpty([]) - } - } - MULTIQC ( ch_multiqc_files.collect(), ch_multiqc_custom_config.collect().ifEmpty([]), FASTQC_RAW.out.zip.collect{it[1]}.ifEmpty([]), - ch_fastqc_trimmed_multiqc.collect().ifEmpty([]), - ch_bowtie2_removal_host_multiqc.collect{it[1]}.ifEmpty([]), + SHORT_READ_PREPROCESS.out.multiqc_fastqc_trimmed.collect().ifEmpty([]), + SHORT_READ_PREPROCESS.out.multiqc_bowtie2_removal_host.collect{it[1]}.ifEmpty([]), ch_quast_multiqc.collect().ifEmpty([]), ch_bowtie2_assembly_multiqc.collect().ifEmpty([]), ch_busco_multiqc.collect().ifEmpty([]), - ch_multiqc_readprep.collect().ifEmpty([]) + SHORT_READ_PREPROCESS.out.multiqc_readprep.collect().ifEmpty([]) ) multiqc_report = MULTIQC.out.report.toList() } From 74fea37acc49b8e82778e99c32b4e3fa57e7778e Mon Sep 17 00:00:00 2001 From: prototaxites Date: Wed, 17 May 2023 15:25:22 +0000 Subject: [PATCH 2/6] Create annotation, assembly qc, bin qc, and bin taxonomy subworkflows. --- subworkflows/local/annotation.nf | 45 ++++ subworkflows/local/assembly.nf | 16 +- subworkflows/local/assembly_qc.nf | 20 ++ subworkflows/local/bin_qc.nf | 109 ++++++++ subworkflows/local/bin_taxonomy.nf | 74 ++++++ subworkflows/local/short_read_preprocess.nf | 4 +- workflows/mag.nf | 262 ++++---------------- 7 files changed, 300 insertions(+), 230 deletions(-) create mode 100644 subworkflows/local/annotation.nf create mode 100644 subworkflows/local/assembly_qc.nf create mode 100644 subworkflows/local/bin_qc.nf create mode 100644 subworkflows/local/bin_taxonomy.nf diff --git a/subworkflows/local/annotation.nf b/subworkflows/local/annotation.nf new file mode 100644 index 00000000..aa301ca2 --- /dev/null +++ b/subworkflows/local/annotation.nf @@ -0,0 +1,45 @@ +include { PROKKA } from '../../modules/nf-core/prokka/main' +include { PRODIGAL } from '../../modules/nf-core/prodigal/main' + +workflow ANNOTATION { + take: + bins_unbins + assemblies + + main: + ch_versions = Channel.empty() + + + /* + Prodigal: Predict proteins + */ + if (!params.skip_prodigal){ + PRODIGAL ( + assemblies, + 'gff' + ) + ch_versions = ch_versions.mix(PRODIGAL.out.versions.first()) + } + + /* + * Prokka: Genome annotation + */ + ch_bins_for_prokka = bins_unbins.transpose() + .map { meta, bin -> + def meta_new = meta.clone() + meta_new.id = bin.getBaseName() + [ meta_new, bin ] + } + + if (!params.skip_prokka){ + PROKKA ( + ch_bins_for_prokka, + [], + [] + ) + ch_versions = ch_versions.mix(PROKKA.out.versions.first()) + } + + emit: + versions = ch_versions +} diff --git a/subworkflows/local/assembly.nf b/subworkflows/local/assembly.nf index c3d5f482..de1daafe 100644 --- a/subworkflows/local/assembly.nf +++ b/subworkflows/local/assembly.nf @@ -1,9 +1,9 @@ -include { POOL_SINGLE_READS as POOL_SHORT_SINGLE_READS } from '../../modules/local/pool_single_reads' -include { POOL_PAIRED_READS } from '../../modules/local/pool_paired_reads' -include { POOL_SINGLE_READS as POOL_LONG_READS } from '../../modules/local/pool_single_reads' -include { MEGAHIT } from '../../modules/local/megahit' -include { SPADES } from '../../modules/local/spades' -include { SPADESHYBRID } from '../../modules/local/spadeshybrid' +include { POOL_SINGLE_READS as POOL_SHORT_SINGLE_READS } from '../../modules/local/pool_single_reads' +include { POOL_PAIRED_READS } from '../../modules/local/pool_paired_reads' +include { POOL_SINGLE_READS as POOL_LONG_READS } from '../../modules/local/pool_single_reads' +include { MEGAHIT } from '../../modules/local/megahit' +include { SPADES } from '../../modules/local/spades' +include { SPADESHYBRID } from '../../modules/local/spadeshybrid' workflow ASSEMBLY { take: @@ -128,6 +128,6 @@ workflow ASSEMBLY { } emit: - assemblies = ch_assemblies - versions = ch_versions + assemblies = ch_assemblies + versions = ch_versions } diff --git a/subworkflows/local/assembly_qc.nf b/subworkflows/local/assembly_qc.nf new file mode 100644 index 00000000..ae21625a --- /dev/null +++ b/subworkflows/local/assembly_qc.nf @@ -0,0 +1,20 @@ +include { QUAST } from '../../modules/local/quast' + +workflow ASSEMBLY_QC { + take: + assemblies + + main: + ch_versions = Channel.empty() + ch_quast_multiqc = Channel.empty() + + if (!params.skip_quast){ + QUAST ( assemblies ) + ch_quast_multiqc = QUAST.out.qc + ch_versions = ch_versions.mix(QUAST.out.versions.first()) + } + + emit: + quast_multiqc = ch_quast_multiqc + versions = ch_versions +} diff --git a/subworkflows/local/bin_qc.nf b/subworkflows/local/bin_qc.nf new file mode 100644 index 00000000..a7dfed99 --- /dev/null +++ b/subworkflows/local/bin_qc.nf @@ -0,0 +1,109 @@ +include { ARIA2 as ARIA2_UNTAR } from '../../modules/nf-core/aria2/main' +include { BUSCO_QC } from '../../subworkflows/local/busco_qc' +include { CHECKM_QC } from '../../subworkflows/local/checkm_qc' +include { GUNC_QC } from '../../subworkflows/local/gunc_qc' +include { QUAST_BINS } from '../../modules/local/quast_bins' +include { QUAST_BINS_SUMMARY } from '../../modules/local/quast_bins_summary' + +workflow BIN_QC { + take: + bins_unbins + + main: + if(params.busco_reference){ + ch_busco_db_file = Channel + .value(file( "${params.busco_reference}" )) + } else { + ch_busco_db_file = Channel.empty() + } + if (params.busco_download_path) { + ch_busco_download_folder = Channel + .value(file( "${params.busco_download_path}" )) + } else { + ch_busco_download_folder = Channel.empty() + } + + if(params.checkm_db) { + ch_checkm_db = file(params.checkm_db, checkIfExists: true) + } + + // Get checkM database if not supplied + if ( !params.skip_binqc && params.binqc_tool == 'checkm' && !params.checkm_db ) { + ARIA2_UNTAR (params.checkm_download_url) + ch_checkm_db = ARIA2_UNTAR.out.downloaded_file + } + + if (params.gunc_db) { + ch_gunc_db = file(params.gunc_db, checkIfExists: true) + } else { + ch_gunc_db = Channel.empty() + } + + ch_versions = Channel.empty() + + ch_busco_multiqc = Channel.empty() + ch_busco_summary = Channel.empty() + ch_checkm_summary = Channel.empty() + ch_quast_bins_summary = Channel.empty() + + bins_unbins_transposed = bins_unbins.transpose() + + if (!params.skip_binqc && params.binqc_tool == 'busco'){ + /* + * BUSCO subworkflow: Quantitative measures for the assessment of genome assembly + */ + BUSCO_QC ( + ch_busco_db_file, + ch_busco_download_folder, + bins_unbins_transposed + ) + ch_busco_summary = BUSCO_QC.out.summary + ch_busco_multiqc = BUSCO_QC.out.multiqc + ch_versions = ch_versions.mix(BUSCO_QC.out.versions.first()) + } + + if (!params.skip_binqc && params.binqc_tool == 'checkm'){ + /* + * CheckM subworkflow: Quantitative measures for the assessment of genome assembly + */ + CHECKM_QC ( + bins_unbins_transposed.groupTuple(), + ch_checkm_db + ) + ch_checkm_summary = CHECKM_QC.out.summary + + // TODO custom output parsing? Add to MultiQC? + ch_versions = ch_versions.mix(CHECKM_QC.out.versions) + + } + + if ( params.run_gunc && params.binqc_tool == 'checkm' ) { + GUNC_QC ( bins_unbins_transposed, ch_gunc_db, CHECKM_QC.out.checkm_tsv ) + ch_versions = ch_versions.mix( GUNC_QC.out.versions ) + } else if ( params.run_gunc ) { + GUNC_QC ( bins_unbins_transposed, ch_gunc_db, [] ) + ch_versions = ch_versions.mix( GUNC_QC.out.versions ) + } + + if (!params.skip_quast){ + ch_input_for_quast_bins = bins_unbins + .groupTuple() + .map{ + meta, reads -> + def new_reads = reads.flatten() + [meta, new_reads] + } + QUAST_BINS ( ch_input_for_quast_bins ) + ch_versions = ch_versions.mix(QUAST_BINS.out.versions.first()) + QUAST_BINS_SUMMARY ( QUAST_BINS.out.quast_bin_summaries.collect() ) + ch_quast_bins_summary = QUAST_BINS_SUMMARY.out.summary + } + + emit: + busco_summary = ch_busco_summary + busco_multiqc = ch_busco_multiqc + busco_failed_bins = BUSCO_QC.out.failed_bin + checkm_summary = ch_checkm_summary + quast_bins_summary = ch_quast_bins_summary + versions = ch_versions +} diff --git a/subworkflows/local/bin_taxonomy.nf b/subworkflows/local/bin_taxonomy.nf new file mode 100644 index 00000000..fbedb9a9 --- /dev/null +++ b/subworkflows/local/bin_taxonomy.nf @@ -0,0 +1,74 @@ +include { CAT_DB } from '../../modules/local/cat_db' +include { CAT_DB_GENERATE } from '../../modules/local/cat_db_generate' +include { CAT } from '../../modules/local/cat' +include { CAT_SUMMARY } from "../../modules/local/cat_summary" +include { GTDBTK } from '../../subworkflows/local/gtdbtk' + +workflow BIN_TAXONOMY { + take: + bins_unbins + busco_summary + checkm_summary + + main: + ch_versions = Channel.empty() + + if(params.cat_db){ + ch_cat_db_file = Channel + .value(file( "${params.cat_db}" )) + } else { + ch_cat_db_file = Channel.empty() + } + + gtdb = params.skip_binqc ? false : params.gtdb + if (gtdb) { + ch_gtdb = Channel + .value(file( "${gtdb}" )) + } else { + ch_gtdb = Channel.empty() + } + + ch_cat_db = Channel.empty() + if (params.cat_db){ + CAT_DB ( ch_cat_db_file ) + ch_cat_db = CAT_DB.out.db + } else if (params.cat_db_generate){ + CAT_DB_GENERATE () + ch_cat_db = CAT_DB_GENERATE.out.db + } + + bins_unbins_transposed = bins_unbins.transpose() + + /* + * CAT: Bin Annotation Tool (BAT) are pipelines for the taxonomic classification of long DNA sequences and metagenome assembled genomes (MAGs/bins) + */ + + CAT ( + bins_unbins_transposed, + ch_cat_db + ) + CAT_SUMMARY( + CAT.out.tax_classification.collect() + ) + ch_versions = ch_versions.mix(CAT.out.versions.first()) + ch_versions = ch_versions.mix(CAT_SUMMARY.out.versions) + + /* + * GTDB-tk: taxonomic classifications using GTDB reference + */ + ch_gtdbtk_summary = Channel.empty() + if ( gtdb ){ + GTDBTK ( + bins_unbins_transposed, + ch_busco_summary, + ch_checkm_summary, + ch_gtdb + ) + ch_versions = ch_versions.mix(GTDBTK.out.versions.first()) + ch_gtdbtk_summary = GTDBTK.out.summary + } + + emit: + gtdbtk_summary = ch_gtdbtk_summary + versions = ch_versions +} diff --git a/subworkflows/local/short_read_preprocess.nf b/subworkflows/local/short_read_preprocess.nf index eecd63b3..f5dd0e68 100644 --- a/subworkflows/local/short_read_preprocess.nf +++ b/subworkflows/local/short_read_preprocess.nf @@ -75,7 +75,7 @@ workflow SHORT_READ_PREPROCESS { } } - if (params.host_fasta){ + if (params.host_fasta && !params.assembly_input){ BOWTIE2_HOST_REMOVAL_BUILD ( ch_host_fasta ) @@ -92,7 +92,7 @@ workflow SHORT_READ_PREPROCESS { ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.versions.first()) } - if(!params.keep_phix) { + if(!params.keep_phix && !params.assembly_input) { BOWTIE2_PHIX_REMOVAL_BUILD ( ch_phix_db_file ) diff --git a/workflows/mag.nf b/workflows/mag.nf index c795daed..e25797d9 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -52,36 +52,22 @@ ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.mu ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// MODULE: Local to the pipeline -// -include { QUAST } from '../modules/local/quast' -include { QUAST_BINS } from '../modules/local/quast_bins' -include { QUAST_BINS_SUMMARY } from '../modules/local/quast_bins_summary' -include { CAT_DB } from '../modules/local/cat_db' -include { CAT_DB_GENERATE } from '../modules/local/cat_db_generate' -include { CAT } from '../modules/local/cat' -include { CAT_SUMMARY } from "../modules/local/cat_summary" -include { BIN_SUMMARY } from '../modules/local/bin_summary' -include { COMBINE_TSV as COMBINE_SUMMARY_TSV } from '../modules/local/combine_tsv' -include { MULTIQC } from '../modules/local/multiqc' - -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { SHORT_READ_PREPROCESS } from '../subworkflows/local/short_read_preprocess' -include { LONG_READ_PREPROCESS } from '../subworkflows/local/long_read_preprocess' -include { SHORT_READ_TAXONOMY } from '../subworkflows/local/short_read_taxonomy' -include { ASSEMBLY } from '../subworkflows/local/assembly' -include { BINNING_PREPARATION } from '../subworkflows/local/binning_preparation' -include { BINNING } from '../subworkflows/local/binning' -include { BINNING_REFINEMENT } from '../subworkflows/local/binning_refinement' -include { BUSCO_QC } from '../subworkflows/local/busco_qc' -include { CHECKM_QC } from '../subworkflows/local/checkm_qc' -include { GUNC_QC } from '../subworkflows/local/gunc_qc' -include { GTDBTK } from '../subworkflows/local/gtdbtk' -include { ANCIENT_DNA_ASSEMBLY_VALIDATION } from '../subworkflows/local/ancient_dna' +include { BIN_SUMMARY } from '../modules/local/bin_summary' +include { COMBINE_TSV as COMBINE_SUMMARY_TSV } from '../modules/local/combine_tsv' +include { MULTIQC } from '../modules/local/multiqc' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { SHORT_READ_PREPROCESS } from '../subworkflows/local/short_read_preprocess' +include { LONG_READ_PREPROCESS } from '../subworkflows/local/long_read_preprocess' +include { SHORT_READ_TAXONOMY } from '../subworkflows/local/short_read_taxonomy' +include { ASSEMBLY } from '../subworkflows/local/assembly' +include { ASSEMBLY_QC } from '../subworkflows/local/assembly_qc' +include { BINNING_PREPARATION } from '../subworkflows/local/binning_preparation' +include { BINNING } from '../subworkflows/local/binning' +include { BINNING_REFINEMENT } from '../subworkflows/local/binning_refinement' +include { ANCIENT_DNA_ASSEMBLY_VALIDATION } from '../subworkflows/local/ancient_dna' +include { BIN_QC } from '../subworkflows/local/bin_qc' +include { BIN_TAXONOMY } from '../subworkflows/local/bin_taxonomy' +include { ANNOTATION } from '../subworkflows/local/annotation' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -89,57 +75,9 @@ include { ANCIENT_DNA_ASSEMBLY_VALIDATION } from '../subworkflows/local/ancient_ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// MODULE: Installed directly from nf-core/modules -// -include { ARIA2 as ARIA2_UNTAR } from '../modules/nf-core/aria2/main' include { FASTQC as FASTQC_RAW } from '../modules/nf-core/fastqc/main' -include { PRODIGAL } from '../modules/nf-core/prodigal/main' -include { PROKKA } from '../modules/nf-core/prokka/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -//////////////////////////////////////////////////// -/* -- Create channel for reference databases -- */ -//////////////////////////////////////////////////// - -if(params.busco_reference){ - ch_busco_db_file = Channel - .value(file( "${params.busco_reference}" )) -} else { - ch_busco_db_file = Channel.empty() -} -if (params.busco_download_path) { - ch_busco_download_folder = Channel - .value(file( "${params.busco_download_path}" )) -} else { - ch_busco_download_folder = Channel.empty() -} - -if(params.checkm_db) { - ch_checkm_db = file(params.checkm_db, checkIfExists: true) -} - -if (params.gunc_db) { - ch_gunc_db = file(params.gunc_db, checkIfExists: true) -} else { - ch_gunc_db = Channel.empty() -} - -if(params.cat_db){ - ch_cat_db_file = Channel - .value(file( "${params.cat_db}" )) -} else { - ch_cat_db_file = Channel.empty() -} - -gtdb = params.skip_binqc ? false : params.gtdb -if (gtdb) { - ch_gtdb = Channel - .value(file( "${gtdb}" )) -} else { - ch_gtdb = Channel.empty() -} - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -154,13 +92,6 @@ workflow MAG { ch_versions = Channel.empty() - // Get checkM database if not supplied - - if ( !params.skip_binqc && params.binqc_tool == 'checkm' && !params.checkm_db ) { - ARIA2_UNTAR (params.checkm_download_url) - ch_checkm_db = ARIA2_UNTAR.out.downloaded_file - } - // // SUBWORKFLOW: Read in samplesheet, validate and stage input files // @@ -225,30 +156,12 @@ workflow MAG { */ ch_assemblies = Channel.empty() - ASSEMBLY(ch_short_reads_assembly, ch_long_reads) + ASSEMBLY ( ch_short_reads_assembly, ch_long_reads ) ch_assemblies = ch_assemblies.mix(ch_input_assemblies, ASSEMBLY.out.assemblies) ch_versions = ch_versions.mix(ASSEMBLY.out.versions) - ch_quast_multiqc = Channel.empty() - if (!params.skip_quast){ - QUAST ( ch_assemblies ) - ch_quast_multiqc = QUAST.out.qc - ch_versions = ch_versions.mix(QUAST.out.versions.first()) - } - - /* - ================================================================================ - Predict proteins - ================================================================================ - */ - - if (!params.skip_prodigal){ - PRODIGAL ( - ch_assemblies, - 'gff' - ) - ch_versions = ch_versions.mix(PRODIGAL.out.versions.first()) - } + ASSEMBLY_QC ( ch_assemblies ) + ch_versions = ch_versions.mix(ASSEMBLY_QC.out.versions) /* ================================================================================ @@ -256,14 +169,11 @@ workflow MAG { ================================================================================ */ - ch_bowtie2_assembly_multiqc = Channel.empty() ch_busco_summary = Channel.empty() ch_checkm_summary = Channel.empty() ch_busco_multiqc = Channel.empty() - - BINNING_PREPARATION ( ch_assemblies, ch_short_reads @@ -341,129 +251,41 @@ workflow MAG { * Bin QC subworkflows: for checking bin completeness with either BUSCO, CHECKM, and/or GUNC */ - // Results in: [ [meta], path_to_bin.fa ] - ch_input_bins_for_qc = ch_input_for_postbinning_bins_unbins.transpose() + BIN_QC(ch_input_for_postbinning_bins_unbins) + ch_versions = ch_versions.mix(BIN_QC.out.versions) + // process information if BUSCO analysis failed for individual bins due to no matching genes if (!params.skip_binqc && params.binqc_tool == 'busco'){ - /* - * BUSCO subworkflow: Quantitative measures for the assessment of genome assembly - */ - BUSCO_QC ( - ch_busco_db_file, - ch_busco_download_folder, - ch_input_bins_for_qc - ) - ch_busco_summary = BUSCO_QC.out.summary - ch_busco_multiqc = BUSCO_QC.out.multiqc - ch_versions = ch_versions.mix(BUSCO_QC.out.versions.first()) - // process information if BUSCO analysis failed for individual bins due to no matching genes - BUSCO_QC.out - .failed_bin + BIN_QC.out + .busco_failed_bins .splitCsv(sep: '\t') .map { bin, error -> if (!bin.contains(".unbinned.")) busco_failed_bins[bin] = error } } + + /* + * Bin taxonomy subworkflows: for assigning taxonomy to bins with either GTDB-Tk or CAT + */ - if (!params.skip_binqc && params.binqc_tool == 'checkm'){ - /* - * CheckM subworkflow: Quantitative measures for the assessment of genome assembly - */ - CHECKM_QC ( - ch_input_bins_for_qc.groupTuple(), - ch_checkm_db - ) - ch_checkm_summary = CHECKM_QC.out.summary - - // TODO custom output parsing? Add to MultiQC? - ch_versions = ch_versions.mix(CHECKM_QC.out.versions) - - } - - if ( params.run_gunc && params.binqc_tool == 'checkm' ) { - GUNC_QC ( ch_input_bins_for_qc, ch_gunc_db, CHECKM_QC.out.checkm_tsv ) - ch_versions = ch_versions.mix( GUNC_QC.out.versions ) - } else if ( params.run_gunc ) { - GUNC_QC ( ch_input_bins_for_qc, ch_gunc_db, [] ) - ch_versions = ch_versions.mix( GUNC_QC.out.versions ) - } - - ch_quast_bins_summary = Channel.empty() - if (!params.skip_quast){ - ch_input_for_quast_bins = ch_input_for_postbinning_bins_unbins - .groupTuple() - .map{ - meta, reads -> - def new_reads = reads.flatten() - [meta, new_reads] - } - QUAST_BINS ( ch_input_for_quast_bins ) - ch_versions = ch_versions.mix(QUAST_BINS.out.versions.first()) - QUAST_BINS_SUMMARY ( QUAST_BINS.out.quast_bin_summaries.collect() ) - ch_quast_bins_summary = QUAST_BINS_SUMMARY.out.summary - } + BIN_TAXONOMY(ch_input_for_postbinning_bins_unbins, BIN_QC.out.busco_summary, BIN_QC.out.checkm_summary) + ch_versions = ch_versions.mix(BIN_TAXONOMY.out.versions) /* - * CAT: Bin Annotation Tool (BAT) are pipelines for the taxonomic classification of long DNA sequences and metagenome assembled genomes (MAGs/bins) - */ - ch_cat_db = Channel.empty() - if (params.cat_db){ - CAT_DB ( ch_cat_db_file ) - ch_cat_db = CAT_DB.out.db - } else if (params.cat_db_generate){ - CAT_DB_GENERATE () - ch_cat_db = CAT_DB_GENERATE.out.db - } - CAT ( - ch_input_for_postbinning_bins_unbins, - ch_cat_db - ) - CAT_SUMMARY( - CAT.out.tax_classification.collect() - ) - ch_versions = ch_versions.mix(CAT.out.versions.first()) - ch_versions = ch_versions.mix(CAT_SUMMARY.out.versions) + * Annotation subworkflows: Prodigal and Prokka + */ + ANNOTATION(ch_input_for_postbinning_bins_unbins, ch_assemblies) + ch_versions = ch_versions.mix(ANNOTATION.out.versions) /* - * GTDB-tk: taxonomic classifications using GTDB reference - */ - ch_gtdbtk_summary = Channel.empty() - if ( gtdb ){ - GTDBTK ( - ch_input_for_postbinning_bins_unbins, - ch_busco_summary, - ch_checkm_summary, - ch_gtdb - ) - ch_versions = ch_versions.mix(GTDBTK.out.versions.first()) - ch_gtdbtk_summary = GTDBTK.out.summary - } - + * Bin summary + */ if ( ( !params.skip_binqc ) || !params.skip_quast || gtdb){ BIN_SUMMARY ( ch_input_for_binsummary, - ch_busco_summary.ifEmpty([]), - ch_checkm_summary.ifEmpty([]), - ch_quast_bins_summary.ifEmpty([]), - ch_gtdbtk_summary.ifEmpty([]) - ) - } - - /* - * Prokka: Genome annotation - */ - ch_bins_for_prokka = ch_input_for_postbinning_bins_unbins.transpose() - .map { meta, bin -> - def meta_new = meta.clone() - meta_new.id = bin.getBaseName() - [ meta_new, bin ] - } - - if (!params.skip_prokka){ - PROKKA ( - ch_bins_for_prokka, - [], - [] + BIN_QC.out.busco_summary.ifEmpty([]), + BIN_QC.out.checkm_summary.ifEmpty([]), + BIN_QC.out.quast_bins_summary.ifEmpty([]), + BIN_TAXONOMY.out.gtdbtk_summary.ifEmpty([]) ) - ch_versions = ch_versions.mix(PROKKA.out.versions.first()) } } @@ -508,9 +330,9 @@ workflow MAG { FASTQC_RAW.out.zip.collect{it[1]}.ifEmpty([]), SHORT_READ_PREPROCESS.out.multiqc_fastqc_trimmed.collect().ifEmpty([]), SHORT_READ_PREPROCESS.out.multiqc_bowtie2_removal_host.collect{it[1]}.ifEmpty([]), - ch_quast_multiqc.collect().ifEmpty([]), + ASSEMBLY_QC.out.quast_multiqc.collect().ifEmpty([]), ch_bowtie2_assembly_multiqc.collect().ifEmpty([]), - ch_busco_multiqc.collect().ifEmpty([]), + BIN_QC.out.busco_multiqc.collect().ifEmpty([]), SHORT_READ_PREPROCESS.out.multiqc_readprep.collect().ifEmpty([]) ) multiqc_report = MULTIQC.out.report.toList() From 8fb771a2e20e91c544483b9f69c2c44725ae3bbe Mon Sep 17 00:00:00 2001 From: Jim Downie <19718667+prototaxites@users.noreply.github.com> Date: Wed, 17 May 2023 16:32:29 +0100 Subject: [PATCH 3/6] Update nextflow.config --- nextflow.config | 1 - 1 file changed, 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index c5c3cf85..f3705d68 100644 --- a/nextflow.config +++ b/nextflow.config @@ -290,7 +290,6 @@ profiles { test_ancient_dna { includeConfig 'conf/test_ancient_dna.config' } test_adapterremoval { includeConfig 'conf/test_adapterremoval.config' } test_binrefinement { includeConfig 'conf/test_binrefinement.config' } - test_binning_entry { includeConfig 'conf/test_binning_entry.config' } test_no_clipping { includeConfig 'conf/test_no_clipping.config' } test_bbnorm { includeConfig 'conf/test_bbnorm.config' } } From 47ce949e4097b0c8809cf9b1e88026067ba1cba2 Mon Sep 17 00:00:00 2001 From: prototaxites Date: Thu, 18 May 2023 08:47:54 +0000 Subject: [PATCH 4/6] Tidy code --- subworkflows/local/bin_qc.nf | 12 ++++++------ workflows/mag.nf | 29 +++++++++++++++++++++-------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/subworkflows/local/bin_qc.nf b/subworkflows/local/bin_qc.nf index a7dfed99..70bc05e8 100644 --- a/subworkflows/local/bin_qc.nf +++ b/subworkflows/local/bin_qc.nf @@ -39,12 +39,12 @@ workflow BIN_QC { ch_gunc_db = Channel.empty() } - ch_versions = Channel.empty() - - ch_busco_multiqc = Channel.empty() - ch_busco_summary = Channel.empty() - ch_checkm_summary = Channel.empty() + ch_versions = Channel.empty() + ch_busco_multiqc = Channel.empty() + ch_busco_summary = Channel.empty() + ch_checkm_summary = Channel.empty() ch_quast_bins_summary = Channel.empty() + ch_busco_failed_bins = Channel.empty() bins_unbins_transposed = bins_unbins.transpose() @@ -102,7 +102,7 @@ workflow BIN_QC { emit: busco_summary = ch_busco_summary busco_multiqc = ch_busco_multiqc - busco_failed_bins = BUSCO_QC.out.failed_bin + busco_failed_bins = ch_busco_failed_bins checkm_summary = ch_checkm_summary quast_bins_summary = ch_quast_bins_summary versions = ch_versions diff --git a/workflows/mag.nf b/workflows/mag.nf index e25797d9..ea3286b9 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -113,7 +113,7 @@ workflow MAG { ch_short_reads_preprocess = params.assembly_input ? Channel.empty() : ch_raw_short_reads - SHORT_READ_PREPROCESS(ch_short_reads_preprocess) + SHORT_READ_PREPROCESS ( ch_short_reads_preprocess ) ch_versions = ch_versions.mix(SHORT_READ_PREPROCESS.out.versions) if ( !params.assembly_input ) { @@ -132,7 +132,10 @@ workflow MAG { ch_long_reads_preprocess = params.assembly_input ? Channel.empty() : ch_raw_long_reads - LONG_READ_PREPROCESS(ch_long_reads_preprocess, ch_short_reads) + LONG_READ_PREPROCESS ( + ch_long_reads_preprocess, + ch_short_reads + ) ch_versions = ch_versions.mix(LONG_READ_PREPROCESS.out.versions) if ( !params.assembly_input ) { @@ -146,7 +149,7 @@ workflow MAG { ================================================================================ */ - SHORT_READ_TAXONOMY(ch_short_reads) + SHORT_READ_TAXONOMY ( ch_short_reads ) ch_versions = ch_versions.mix(SHORT_READ_TAXONOMY.out.versions) /* @@ -156,7 +159,10 @@ workflow MAG { */ ch_assemblies = Channel.empty() - ASSEMBLY ( ch_short_reads_assembly, ch_long_reads ) + ASSEMBLY ( + ch_short_reads_assembly, + ch_long_reads + ) ch_assemblies = ch_assemblies.mix(ch_input_assemblies, ASSEMBLY.out.assemblies) ch_versions = ch_versions.mix(ASSEMBLY.out.versions) @@ -251,7 +257,7 @@ workflow MAG { * Bin QC subworkflows: for checking bin completeness with either BUSCO, CHECKM, and/or GUNC */ - BIN_QC(ch_input_for_postbinning_bins_unbins) + BIN_QC ( ch_input_for_postbinning_bins_unbins ) ch_versions = ch_versions.mix(BIN_QC.out.versions) // process information if BUSCO analysis failed for individual bins due to no matching genes @@ -261,18 +267,25 @@ workflow MAG { .splitCsv(sep: '\t') .map { bin, error -> if (!bin.contains(".unbinned.")) busco_failed_bins[bin] = error } } - + /* * Bin taxonomy subworkflows: for assigning taxonomy to bins with either GTDB-Tk or CAT */ - BIN_TAXONOMY(ch_input_for_postbinning_bins_unbins, BIN_QC.out.busco_summary, BIN_QC.out.checkm_summary) + BIN_TAXONOMY ( + ch_input_for_postbinning_bins_unbins, + BIN_QC.out.busco_summary, + BIN_QC.out.checkm_summary + ) ch_versions = ch_versions.mix(BIN_TAXONOMY.out.versions) /* * Annotation subworkflows: Prodigal and Prokka */ - ANNOTATION(ch_input_for_postbinning_bins_unbins, ch_assemblies) + ANNOTATION ( + ch_input_for_postbinning_bins_unbins, + ch_assemblies + ) ch_versions = ch_versions.mix(ANNOTATION.out.versions) /* From b55715da3d0addd35433b7fec554096e4acbf52d Mon Sep 17 00:00:00 2001 From: Jim Downie <19718667+prototaxites@users.noreply.github.com> Date: Thu, 18 May 2023 09:54:35 +0100 Subject: [PATCH 5/6] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 50c5b95f..455d0137 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#429](https://github.com/nf-core/mag/pull/429) - Replaced hardcoded CheckM database auto-download URL to a parameter (reported by @erikrikarddaniel, fix by @jfy133) - [#434](https://github.com/nf-core/mag/pull/434) - Fix location of samplesheet for AWS full tests (reported by @Lfulcrum, fix by @jfy133) - [#438](https://github.com/nf-core/mag/pull/438) - Fixed version inconsistency between conda and containers for GTDBTK_CLASSIFYWF (by @jfy133) +- [#446](https://github.com/nf-core/mag/pull/446) - Reorganise main workflow code into subworkflows for increased modularity (by @prototaxites) ### `Dependencies` From d1db73b9e20c7b0040f868fab95176a6cc5b9d59 Mon Sep 17 00:00:00 2001 From: prototaxites Date: Tue, 13 Jun 2023 10:22:42 +0000 Subject: [PATCH 6/6] Fix adapterremoval bug --- subworkflows/local/short_read_preprocess.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/short_read_preprocess.nf b/subworkflows/local/short_read_preprocess.nf index c03d3329..0e472224 100644 --- a/subworkflows/local/short_read_preprocess.nf +++ b/subworkflows/local/short_read_preprocess.nf @@ -67,7 +67,8 @@ workflow SHORT_READ_PREPROCESS { ADAPTERREMOVAL_PE ( ch_adapterremoval_in.paired, [] ) ADAPTERREMOVAL_SE ( ch_adapterremoval_in.single, [] ) - ch_short_reads_prepped = ch_short_reads.mix(ADAPTERREMOVAL_SE.out.singles_truncated, ADAPTERREMOVAL_PE.out.paired_truncated) + ch_short_reads_prepped = Channel.empty() + ch_short_reads_prepped = ch_short_reads_prepped.mix(ADAPTERREMOVAL_SE.out.singles_truncated, ADAPTERREMOVAL_PE.out.paired_truncated) ch_multiqc_readprep = ch_multiqc_readprep.mix ( ADAPTERREMOVAL_PE.out.settings.collect{it[1]}.ifEmpty([]), ADAPTERREMOVAL_SE.out.settings.collect{it[1]}.ifEmpty([])