Merge pull request #37 from nf-core/16-implement-doubletdetection-dou…

…blet-detection Implement doubletdetection doublet detection
nf-core · Jun 15, 2024 · 0ec199c · 0ec199c
2 parents 5a26e99 + f59450d
commit 0ec199c
Show file tree

Hide file tree

Showing 10 changed files with 109 additions and 5 deletions.
diff --git a/conf/modules.config b/conf/modules.config
@@ -39,6 +39,10 @@ process {
         ext.prefix = { meta.id + '_scrublet' }
     }
 
+    withName: DOUBLETDETECTION {
+        ext.prefix = { meta.id + '_doubletdetection' }
+    }
+
     withName: SCVITOOLS_SCVI {
         ext.prefix = { meta.id + '_scvi' }
     }

diff --git a/conf/test_full.config b/conf/test_full.config
@@ -22,5 +22,5 @@ params {
     // Input data for full size test
     input  = params.pipelines_testdata_base_path + 'scdownstream/samplesheet.csv'
     integration_methods = 'scvi,harmony,bbknn,combat,seurat'
-    doublet_detection = 'solo,scrublet'
+    doublet_detection = 'solo,scrublet,doubletdetection'
 }
diff --git a/modules/local/adata/readrds/main.nf b/modules/local/adata/readrds/main.nf
@@ -12,7 +12,7 @@ process ADATA_READRDS {
 
     output:
     tuple val(meta), path("*.h5ad"), emit: h5ad
-    path("*.pkl")                  , emit: obsm
+    path("*.pkl")                  , emit: obsm, optional: true
     path "versions.yml"            , emit: versions
 
     when:

diff --git a/...les/local/doublet_removal/environment.yml → ...detection/doublet_removal/environment.yml b/...les/local/doublet_removal/environment.yml → ...detection/doublet_removal/environment.yml
diff --git a/modules/local/doublet_removal/main.nf → ...doublet_detection/doublet_removal/main.nf b/modules/local/doublet_removal/main.nf → ...doublet_detection/doublet_removal/main.nf
diff --git a/...blet_removal/templates/doublet_removal.py → ...blet_removal/templates/doublet_removal.py b/...blet_removal/templates/doublet_removal.py → ...blet_removal/templates/doublet_removal.py
diff --git a/modules/local/doublet_detection/doubletdetection/environment.yml b/modules/local/doublet_detection/doubletdetection/environment.yml
@@ -0,0 +1,10 @@
+name: doubletdetection
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::anndata=0.10.7
+  - conda-forge::louvain=0.8.2
+  - pip
+  - pip:
+      - doubletdetection==4.2
diff --git a/modules/local/doublet_detection/doubletdetection/main.nf b/modules/local/doublet_detection/doubletdetection/main.nf
@@ -0,0 +1,24 @@
+process DOUBLETDETECTION {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'oras://community.wave.seqera.io/library/anndata_louvain_pip_doubletdetection:42d2326cc250350b':
+        'community.wave.seqera.io/library/anndata_louvain_pip_doubletdetection:cbe92394c10372fa' }"
+
+    input:
+    tuple val(meta), path(h5ad)
+
+    output:
+    tuple val(meta), path("*.h5ad"), emit: h5ad
+    tuple val(meta), path("*.pkl") , emit: predictions
+    path "versions.yml"            , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    template 'doubletdetection.py'
+}
diff --git a/modules/local/doublet_detection/doubletdetection/templates/doubletdetection.py b/modules/local/doublet_detection/doubletdetection/templates/doubletdetection.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+
+# Set numba cache to /tmp
+# This is not an ideal solution
+import os
+os.environ["NUMBA_CACHE_DIR"] = "/tmp"
+
+import anndata as ad
+import doubletdetection
+import platform
+
+def format_yaml_like(data: dict, indent: int = 0) -> str:
+    """Formats a dictionary to a YAML-like string.
+
+    Args:
+        data (dict): The dictionary to format.
+        indent (int): The current indentation level.
+
+    Returns:
+        str: A string formatted as YAML.
+    """
+    yaml_str = ""
+    for key, value in data.items():
+        spaces = "  " * indent
+        if isinstance(value, dict):
+            yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
+        else:
+            yaml_str += f"{spaces}{key}: {value}\\n"
+    return yaml_str
+
+adata = ad.read_h5ad("${h5ad}")
+
+counts = adata.layers["counts"]
+
+clf = doubletdetection.BoostClassifier()
+doublets = clf.fit(counts).predict()
+scores = clf.doublet_score()
+
+adata.obs["doublet"] = [label == 1 for label in doublets]
+adata.obs["doublet_score"] = scores
+
+adata.write_h5ad("${prefix}.h5ad")
+
+df = adata.obs[["doublet"]]
+df.columns = ["${prefix}"]
+df.to_pickle("${prefix}.pkl")
+
+# Versions
+
+versions = {
+    "${task.process}": {
+        "python": platform.python_version(),
+        "anndata": ad.__version__,
+        "doubletdetection": doubletdetection.__version__,
+    }
+}
+
+with open("versions.yml", "w") as f:
+    f.write(format_yaml_like(versions))
diff --git a/subworkflows/local/doublet_detection.nf b/subworkflows/local/doublet_detection.nf
@@ -1,6 +1,7 @@
-include { SCVITOOLS_SOLO  } from '../../modules/local/scvitools/solo'
-include { SCANPY_SCRUBLET } from '../../modules/local/scanpy/scrublet'
-include { DOUBLET_REMOVAL } from '../../modules/local/doublet_removal'
+include { SCVITOOLS_SOLO   } from '../../modules/local/scvitools/solo'
+include { SCANPY_SCRUBLET  } from '../../modules/local/scanpy/scrublet'
+include { DOUBLETDETECTION } from '../../modules/local/doublet_detection/doubletdetection'
+include { DOUBLET_REMOVAL  } from '../../modules/local/doublet_detection/doublet_removal'
 
 workflow DOUBLET_DETECTION {
     take:
@@ -25,6 +26,12 @@ workflow DOUBLET_DETECTION {
         ch_versions = SCANPY_SCRUBLET.out.versions
     }
 
+    if (methods.contains('doubletdetection')) {
+        DOUBLETDETECTION(ch_h5ad)
+        ch_predictions = ch_predictions.mix(DOUBLETDETECTION.out.predictions)
+        ch_versions = DOUBLETDETECTION.out.versions
+    }
+
     DOUBLET_REMOVAL(
         ch_h5ad.join(ch_predictions.groupTuple()),
         params.doublet_detection_threshold