From 0a95f9d226ff835c82313a46f344387a7a329601 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Wed, 2 Oct 2024 10:42:11 +0200 Subject: [PATCH] Add preprocess_only parameter --- nextflow.config | 1 + nextflow_schema.json | 5 ++ .../main.nf | 4 ++ workflows/scdownstream.nf | 58 ++++++++++--------- 4 files changed, 41 insertions(+), 27 deletions(-) diff --git a/nextflow.config b/nextflow.config index e34005a..0bce9d2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -21,6 +21,7 @@ params { reference_model_type = 'scvi' // Pipeline options + preprocess_only = false memory_scale = 1 ambient_removal = 'decontx' doublet_detection = 'scrublet' diff --git a/nextflow_schema.json b/nextflow_schema.json index 312951c..202d99a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -90,6 +90,11 @@ "type": "object", "description": "Options for selecting which tools should be used for certain tasks", "properties": { + "preprocess_only": { + "type": "boolean", + "default": false, + "description": "Only run the preprocessing steps, skip the integration and clustering steps" + }, "memory_scale": { "type": "integer", "default": 1, diff --git a/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf b/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf index 242791c..449b4ca 100644 --- a/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf @@ -147,6 +147,10 @@ def validateInputParameters() { throw new Exception("Either an input samplesheet or (base_adata && base_embeddings && base_label_col) must be provided") } + if (params.preprocess_only && !params.input) { + throw new Exception("If preprocess_only is set to true, an input samplesheet must be provided") + } + if (params.base_adata && params.input && !params.reference_model) { throw new Exception("If a base adata file is provided and a samplesheet is provided, a reference model must also be provided") } diff --git a/workflows/scdownstream.nf b/workflows/scdownstream.nf index e94dbb7..a07e7c3 100644 --- a/workflows/scdownstream.nf +++ b/workflows/scdownstream.nf @@ -54,23 +54,25 @@ workflow SCDOWNSTREAM { // Perform automated celltype assignment // - CELLTYPE_ASSIGNMENT(ch_h5ad) - ch_versions = ch_versions.mix(CELLTYPE_ASSIGNMENT.out.versions) - ch_h5ad = CELLTYPE_ASSIGNMENT.out.h5ad - - // - // Combine samples and perform integration - // - - COMBINE(ch_h5ad, ch_base, ch_reference_model) - ch_versions = ch_versions.mix(COMBINE.out.versions) - ch_multiqc_files = ch_multiqc_files.mix(COMBINE.out.multiqc_files) - ch_obs = ch_obs.mix(COMBINE.out.obs) - ch_obsm = ch_obsm.mix(COMBINE.out.obsm) - ch_layers = ch_layers.mix(COMBINE.out.layers) - ch_integrations = ch_integrations.mix(COMBINE.out.integrations) - - ch_finalization_base = COMBINE.out.h5ad + if (!params.preprocess_only) { + CELLTYPE_ASSIGNMENT(ch_h5ad) + ch_versions = ch_versions.mix(CELLTYPE_ASSIGNMENT.out.versions) + ch_h5ad = CELLTYPE_ASSIGNMENT.out.h5ad + + // + // Combine samples and perform integration + // + + COMBINE(ch_h5ad, ch_base, ch_reference_model) + ch_versions = ch_versions.mix(COMBINE.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(COMBINE.out.multiqc_files) + ch_obs = ch_obs.mix(COMBINE.out.obs) + ch_obsm = ch_obsm.mix(COMBINE.out.obsm) + ch_layers = ch_layers.mix(COMBINE.out.layers) + ch_integrations = ch_integrations.mix(COMBINE.out.integrations) + + ch_finalization_base = COMBINE.out.h5ad + } } else { ch_embeddings = Channel.value(params.base_embeddings.split(',').collect{it.trim()}) @@ -90,16 +92,18 @@ workflow SCDOWNSTREAM { // Perform clustering and per-cluster analysis // - CLUSTER(ch_integrations) - ch_versions = ch_versions.mix(CLUSTER.out.versions) - ch_obs = ch_obs.mix(CLUSTER.out.obs) - ch_obsm = ch_obsm.mix(CLUSTER.out.obsm) - ch_obsp = ch_obsp.mix(CLUSTER.out.obsp) - ch_uns = ch_uns.mix(CLUSTER.out.uns) - ch_multiqc_files = ch_multiqc_files.mix(CLUSTER.out.multiqc_files) - - FINALIZE(ch_finalization_base, ch_obs, ch_obsm, ch_obsp, ch_uns, ch_layers) - ch_versions = ch_versions.mix(FINALIZE.out.versions) + if (!params.preprocess_only) { + CLUSTER(ch_integrations) + ch_versions = ch_versions.mix(CLUSTER.out.versions) + ch_obs = ch_obs.mix(CLUSTER.out.obs) + ch_obsm = ch_obsm.mix(CLUSTER.out.obsm) + ch_obsp = ch_obsp.mix(CLUSTER.out.obsp) + ch_uns = ch_uns.mix(CLUSTER.out.uns) + ch_multiqc_files = ch_multiqc_files.mix(CLUSTER.out.multiqc_files) + + FINALIZE(ch_finalization_base, ch_obs, ch_obsm, ch_obsp, ch_uns, ch_layers) + ch_versions = ch_versions.mix(FINALIZE.out.versions) + } // // Collate and save software versions