Merge pull request #38 from nf-core/19-implement-scds-doublet-detection

Implement scds doublet detection
nf-core · Jun 15, 2024 · e1d1c61 · e1d1c61
2 parents 0ec199c + 889aba5
commit e1d1c61
Show file tree

Hide file tree

Showing 8 changed files with 107 additions and 4 deletions.
diff --git a/conf/modules.config b/conf/modules.config
@@ -43,6 +43,10 @@ process {
         ext.prefix = { meta.id + '_doubletdetection' }
     }
 
+    withName: SCDS {
+        ext.prefix = { meta.id + '_scds' }
+    }
+
     withName: SCVITOOLS_SCVI {
         ext.prefix = { meta.id + '_scvi' }
     }

diff --git a/conf/test_full.config b/conf/test_full.config
@@ -22,5 +22,5 @@ params {
     // Input data for full size test
     input  = params.pipelines_testdata_base_path + 'scdownstream/samplesheet.csv'
     integration_methods = 'scvi,harmony,bbknn,combat,seurat'
-    doublet_detection = 'solo,scrublet,doubletdetection'
+    doublet_detection = 'solo,scrublet,doubletdetection,scds'
 }
diff --git a/modules/local/doublet_detection/doublet_removal/templates/doublet_removal.py b/modules/local/doublet_detection/doublet_removal/templates/doublet_removal.py
@@ -32,7 +32,13 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
 threshold = int("${threshold}")
 prefix = "${prefix}"
 
-predictions = pd.concat([pd.read_pickle(f) for f in "${predictions}".split()], axis=1)
+def load(path: str) -> pd.DataFrame:
+    if path.endswith(".pkl"):
+        return pd.read_pickle(path)
+    if path.endswith(".csv"):
+        return pd.read_csv(path, index_col=0)
+
+predictions = pd.concat([load(f) for f in "${predictions}".split()], axis=1)
 mask = predictions.sum(axis=1) >= threshold
 
 adata = adata[mask, :]

diff --git a/modules/local/doublet_detection/scds/environment.yml b/modules/local/doublet_detection/scds/environment.yml
@@ -0,0 +1,6 @@
+name: scds
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::bioconductor-scds=1.18.0
diff --git a/modules/local/doublet_detection/scds/main.nf b/modules/local/doublet_detection/scds/main.nf
@@ -0,0 +1,24 @@
+process SCDS {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'oras://community.wave.seqera.io/library/bioconductor-scds:1.18.0--b0910f04d88fb193':
+        'community.wave.seqera.io/library/bioconductor-scds:1.18.0--aaf652129cf65197' }"
+
+    input:
+    tuple val(meta), path(rds)
+
+    output:
+    tuple val(meta), path("*.rds"), emit: rds
+    tuple val(meta), path("*.csv"), emit: predictions
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    template 'scds.R'
+}
diff --git a/modules/local/doublet_detection/scds/templates/scds.R b/modules/local/doublet_detection/scds/templates/scds.R
@@ -0,0 +1,51 @@
+#!/usr/bin/env Rscript
+
+library(scds)
+library(SingleCellExperiment)
+
+sce <- readRDS("${rds}")
+
+## Annotate doublet using binary classification based doublet scoring:
+sce <- bcds(sce, retRes = TRUE, estNdbl=TRUE)
+
+## Annotate doublet using co-expression based doublet scoring:
+try({
+    sce <- cxds(sce, retRes = TRUE, estNdbl=TRUE)
+})
+
+### If cxds worked, run hybrid, otherwise use bcds annotations
+if ("cxds_score" %in% colnames(colData(sce))) {
+    ## Combine both annotations into a hybrid annotation
+    sce <- cxds_bcds_hybrid(sce, estNdbl=TRUE)
+
+    predictions <- colData(sce)[, 'hybrid_call', drop=FALSE]
+} else {
+    predictions <- colData(sce)[, 'bcds_call', drop=FALSE]
+}
+
+saveRDS(sce, "${prefix}.rds")
+
+colnames(predictions) <- "${prefix}"
+write.csv(predictions, "${prefix}.csv")
+
+################################################
+################################################
+## VERSIONS FILE                              ##
+################################################
+################################################
+
+r.version <- strsplit(version[['version.string']], ' ')[[1]][3]
+scds.version <- as.character(packageVersion('scds'))
+
+writeLines(
+    c(
+        '"${task.process}":',
+        paste('    R:', r.version),
+        paste('    scds:', scds.version)
+    ),
+'versions.yml')
+
+################################################
+################################################
+################################################
+################################################
diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -58,8 +58,8 @@
                     "type": "string",
                     "default": "solo",
                     "description": "Specify the tools to use for doublet detection.",
-                    "help_text": "If you want to use multiple tools, separate them with a comma. Available methods are: solo, scrublet",
-                    "pattern": "^((solo|scrublet)?,?)*[^,]+$"
+                    "help_text": "If you want to use multiple tools, separate them with a comma. Available methods are: solo, scrublet, doubletdetection, scds",
+                    "pattern": "^((solo|scrublet|doubletdetection|scds)?,?)*[^,]+$"
                 },
                 "doublet_detection_threshold": {
                     "type": "integer",

diff --git a/subworkflows/local/doublet_detection.nf b/subworkflows/local/doublet_detection.nf
@@ -1,6 +1,8 @@
+include { ADATA_TORDS      } from '../../modules/local/adata/tords'
 include { SCVITOOLS_SOLO   } from '../../modules/local/scvitools/solo'
 include { SCANPY_SCRUBLET  } from '../../modules/local/scanpy/scrublet'
 include { DOUBLETDETECTION } from '../../modules/local/doublet_detection/doubletdetection'
+include { SCDS             } from '../../modules/local/doublet_detection/scds'
 include { DOUBLET_REMOVAL  } from '../../modules/local/doublet_detection/doublet_removal'
 
 workflow DOUBLET_DETECTION {
@@ -12,6 +14,10 @@ workflow DOUBLET_DETECTION {
     ch_multiqc_files = Channel.empty()
     ch_predictions = Channel.empty()
 
+    ADATA_TORDS(ch_h5ad)
+    ch_versions = ch_versions.mix(ADATA_TORDS.out.versions)
+    ch_rds = ADATA_TORDS.out.rds
+
     methods = params.doublet_detection.split(',').collect{it.trim().toLowerCase()}
 
     if (methods.contains('solo')) {
@@ -32,6 +38,12 @@ workflow DOUBLET_DETECTION {
         ch_versions = DOUBLETDETECTION.out.versions
     }
 
+    if (methods.contains('scds')) {
+        SCDS(ch_rds)
+        ch_predictions = ch_predictions.mix(SCDS.out.predictions)
+        ch_versions = SCDS.out.versions
+    }
+
     DOUBLET_REMOVAL(
         ch_h5ad.join(ch_predictions.groupTuple()),
         params.doublet_detection_threshold