Add preprocess_only parameter

nf-core · Oct 2, 2024 · 0a95f9d · 0a95f9d
1 parent 7c5d731
commit 0a95f9d
Show file tree

Hide file tree

Showing 4 changed files with 41 additions and 27 deletions.
diff --git a/nextflow.config b/nextflow.config
@@ -21,6 +21,7 @@ params {
     reference_model_type         = 'scvi'
 
     // Pipeline options
+    preprocess_only              = false
     memory_scale                 = 1
     ambient_removal              = 'decontx'
     doublet_detection            = 'scrublet'

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -90,6 +90,11 @@
             "type": "object",
             "description": "Options for selecting which tools should be used for certain tasks",
             "properties": {
+                "preprocess_only": {
+                    "type": "boolean",
+                    "default": false,
+                    "description": "Only run the preprocessing steps, skip the integration and clustering steps"
+                },
                 "memory_scale": {
                     "type": "integer",
                     "default": 1,

diff --git a/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf b/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf
@@ -147,6 +147,10 @@ def validateInputParameters() {
         throw new Exception("Either an input samplesheet or (base_adata && base_embeddings && base_label_col) must be provided")
     }
 
+    if (params.preprocess_only && !params.input) {
+        throw new Exception("If preprocess_only is set to true, an input samplesheet must be provided")
+    }
+
     if (params.base_adata && params.input && !params.reference_model) {
         throw new Exception("If a base adata file is provided and a samplesheet is provided, a reference model must also be provided")
     }

diff --git a/workflows/scdownstream.nf b/workflows/scdownstream.nf
@@ -54,23 +54,25 @@ workflow SCDOWNSTREAM {
         // Perform automated celltype assignment
         //
 
-        CELLTYPE_ASSIGNMENT(ch_h5ad)
-        ch_versions = ch_versions.mix(CELLTYPE_ASSIGNMENT.out.versions)
-        ch_h5ad = CELLTYPE_ASSIGNMENT.out.h5ad
-
-        //
-        // Combine samples and perform integration
-        //
-
-        COMBINE(ch_h5ad, ch_base, ch_reference_model)
-        ch_versions      = ch_versions.mix(COMBINE.out.versions)
-        ch_multiqc_files = ch_multiqc_files.mix(COMBINE.out.multiqc_files)
-        ch_obs           = ch_obs.mix(COMBINE.out.obs)
-        ch_obsm          = ch_obsm.mix(COMBINE.out.obsm)
-        ch_layers        = ch_layers.mix(COMBINE.out.layers)
-        ch_integrations  = ch_integrations.mix(COMBINE.out.integrations)
-
-        ch_finalization_base = COMBINE.out.h5ad
+        if (!params.preprocess_only) {
+            CELLTYPE_ASSIGNMENT(ch_h5ad)
+            ch_versions = ch_versions.mix(CELLTYPE_ASSIGNMENT.out.versions)
+            ch_h5ad = CELLTYPE_ASSIGNMENT.out.h5ad
+
+            //
+            // Combine samples and perform integration
+            //
+
+            COMBINE(ch_h5ad, ch_base, ch_reference_model)
+            ch_versions      = ch_versions.mix(COMBINE.out.versions)
+            ch_multiqc_files = ch_multiqc_files.mix(COMBINE.out.multiqc_files)
+            ch_obs           = ch_obs.mix(COMBINE.out.obs)
+            ch_obsm          = ch_obsm.mix(COMBINE.out.obsm)
+            ch_layers        = ch_layers.mix(COMBINE.out.layers)
+            ch_integrations  = ch_integrations.mix(COMBINE.out.integrations)
+
+            ch_finalization_base = COMBINE.out.h5ad
+        }
     } else {
         ch_embeddings = Channel.value(params.base_embeddings.split(',').collect{it.trim()})
 
@@ -90,16 +92,18 @@ workflow SCDOWNSTREAM {
     // Perform clustering and per-cluster analysis
     //
 
-    CLUSTER(ch_integrations)
-    ch_versions = ch_versions.mix(CLUSTER.out.versions)
-    ch_obs = ch_obs.mix(CLUSTER.out.obs)
-    ch_obsm = ch_obsm.mix(CLUSTER.out.obsm)
-    ch_obsp = ch_obsp.mix(CLUSTER.out.obsp)
-    ch_uns = ch_uns.mix(CLUSTER.out.uns)
-    ch_multiqc_files = ch_multiqc_files.mix(CLUSTER.out.multiqc_files)
-
-    FINALIZE(ch_finalization_base, ch_obs, ch_obsm, ch_obsp, ch_uns, ch_layers)
-    ch_versions = ch_versions.mix(FINALIZE.out.versions)
+    if (!params.preprocess_only) {
+        CLUSTER(ch_integrations)
+        ch_versions = ch_versions.mix(CLUSTER.out.versions)
+        ch_obs = ch_obs.mix(CLUSTER.out.obs)
+        ch_obsm = ch_obsm.mix(CLUSTER.out.obsm)
+        ch_obsp = ch_obsp.mix(CLUSTER.out.obsp)
+        ch_uns = ch_uns.mix(CLUSTER.out.uns)
+        ch_multiqc_files = ch_multiqc_files.mix(CLUSTER.out.multiqc_files)
+
+        FINALIZE(ch_finalization_base, ch_obs, ch_obsm, ch_obsp, ch_uns, ch_layers)
+        ch_versions = ch_versions.mix(FINALIZE.out.versions)
+    }
 
     //
     // Collate and save software versions