Skip to content

Commit

Permalink
Merge pull request #37 from nf-core/16-implement-doubletdetection-dou…
Browse files Browse the repository at this point in the history
…blet-detection

Implement doubletdetection doublet detection
  • Loading branch information
nictru authored Jun 15, 2024
2 parents 5a26e99 + f59450d commit 0ec199c
Show file tree
Hide file tree
Showing 10 changed files with 109 additions and 5 deletions.
4 changes: 4 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ process {
ext.prefix = { meta.id + '_scrublet' }
}

withName: DOUBLETDETECTION {
ext.prefix = { meta.id + '_doubletdetection' }
}

withName: SCVITOOLS_SCVI {
ext.prefix = { meta.id + '_scvi' }
}
Expand Down
2 changes: 1 addition & 1 deletion conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,5 @@ params {
// Input data for full size test
input = params.pipelines_testdata_base_path + 'scdownstream/samplesheet.csv'
integration_methods = 'scvi,harmony,bbknn,combat,seurat'
doublet_detection = 'solo,scrublet'
doublet_detection = 'solo,scrublet,doubletdetection'
}
2 changes: 1 addition & 1 deletion modules/local/adata/readrds/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ process ADATA_READRDS {

output:
tuple val(meta), path("*.h5ad"), emit: h5ad
path("*.pkl") , emit: obsm
path("*.pkl") , emit: obsm, optional: true
path "versions.yml" , emit: versions

when:
Expand Down
File renamed without changes.
10 changes: 10 additions & 0 deletions modules/local/doublet_detection/doubletdetection/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: doubletdetection
channels:
- conda-forge
- bioconda
dependencies:
- conda-forge::anndata=0.10.7
- conda-forge::louvain=0.8.2
- pip
- pip:
- doubletdetection==4.2
24 changes: 24 additions & 0 deletions modules/local/doublet_detection/doubletdetection/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
process DOUBLETDETECTION {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'oras://community.wave.seqera.io/library/anndata_louvain_pip_doubletdetection:42d2326cc250350b':
'community.wave.seqera.io/library/anndata_louvain_pip_doubletdetection:cbe92394c10372fa' }"

input:
tuple val(meta), path(h5ad)

output:
tuple val(meta), path("*.h5ad"), emit: h5ad
tuple val(meta), path("*.pkl") , emit: predictions
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: "${meta.id}"
template 'doubletdetection.py'
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env python3

# Set numba cache to /tmp
# This is not an ideal solution
import os
os.environ["NUMBA_CACHE_DIR"] = "/tmp"

import anndata as ad
import doubletdetection
import platform

def format_yaml_like(data: dict, indent: int = 0) -> str:
"""Formats a dictionary to a YAML-like string.
Args:
data (dict): The dictionary to format.
indent (int): The current indentation level.
Returns:
str: A string formatted as YAML.
"""
yaml_str = ""
for key, value in data.items():
spaces = " " * indent
if isinstance(value, dict):
yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
else:
yaml_str += f"{spaces}{key}: {value}\\n"
return yaml_str

adata = ad.read_h5ad("${h5ad}")

counts = adata.layers["counts"]

clf = doubletdetection.BoostClassifier()
doublets = clf.fit(counts).predict()
scores = clf.doublet_score()

adata.obs["doublet"] = [label == 1 for label in doublets]
adata.obs["doublet_score"] = scores

adata.write_h5ad("${prefix}.h5ad")

df = adata.obs[["doublet"]]
df.columns = ["${prefix}"]
df.to_pickle("${prefix}.pkl")

# Versions

versions = {
"${task.process}": {
"python": platform.python_version(),
"anndata": ad.__version__,
"doubletdetection": doubletdetection.__version__,
}
}

with open("versions.yml", "w") as f:
f.write(format_yaml_like(versions))
13 changes: 10 additions & 3 deletions subworkflows/local/doublet_detection.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
include { SCVITOOLS_SOLO } from '../../modules/local/scvitools/solo'
include { SCANPY_SCRUBLET } from '../../modules/local/scanpy/scrublet'
include { DOUBLET_REMOVAL } from '../../modules/local/doublet_removal'
include { SCVITOOLS_SOLO } from '../../modules/local/scvitools/solo'
include { SCANPY_SCRUBLET } from '../../modules/local/scanpy/scrublet'
include { DOUBLETDETECTION } from '../../modules/local/doublet_detection/doubletdetection'
include { DOUBLET_REMOVAL } from '../../modules/local/doublet_detection/doublet_removal'

workflow DOUBLET_DETECTION {
take:
Expand All @@ -25,6 +26,12 @@ workflow DOUBLET_DETECTION {
ch_versions = SCANPY_SCRUBLET.out.versions
}

if (methods.contains('doubletdetection')) {
DOUBLETDETECTION(ch_h5ad)
ch_predictions = ch_predictions.mix(DOUBLETDETECTION.out.predictions)
ch_versions = DOUBLETDETECTION.out.versions
}

DOUBLET_REMOVAL(
ch_h5ad.join(ch_predictions.groupTuple()),
params.doublet_detection_threshold
Expand Down

0 comments on commit 0ec199c

Please sign in to comment.