diff --git a/conf/modules.config b/conf/modules.config index 009334a..0c570a4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -39,6 +39,10 @@ process { ext.prefix = { meta.id + '_scrublet' } } + withName: DOUBLETDETECTION { + ext.prefix = { meta.id + '_doubletdetection' } + } + withName: SCVITOOLS_SCVI { ext.prefix = { meta.id + '_scvi' } } diff --git a/conf/test_full.config b/conf/test_full.config index d102de9..826f7d0 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -22,5 +22,5 @@ params { // Input data for full size test input = params.pipelines_testdata_base_path + 'scdownstream/samplesheet.csv' integration_methods = 'scvi,harmony,bbknn,combat,seurat' - doublet_detection = 'solo,scrublet' + doublet_detection = 'solo,scrublet,doubletdetection' } diff --git a/modules/local/adata/readrds/main.nf b/modules/local/adata/readrds/main.nf index ee60293..7f5ed95 100644 --- a/modules/local/adata/readrds/main.nf +++ b/modules/local/adata/readrds/main.nf @@ -12,7 +12,7 @@ process ADATA_READRDS { output: tuple val(meta), path("*.h5ad"), emit: h5ad - path("*.pkl") , emit: obsm + path("*.pkl") , emit: obsm, optional: true path "versions.yml" , emit: versions when: diff --git a/modules/local/doublet_removal/environment.yml b/modules/local/doublet_detection/doublet_removal/environment.yml similarity index 100% rename from modules/local/doublet_removal/environment.yml rename to modules/local/doublet_detection/doublet_removal/environment.yml diff --git a/modules/local/doublet_removal/main.nf b/modules/local/doublet_detection/doublet_removal/main.nf similarity index 100% rename from modules/local/doublet_removal/main.nf rename to modules/local/doublet_detection/doublet_removal/main.nf diff --git a/modules/local/doublet_removal/templates/doublet_removal.py b/modules/local/doublet_detection/doublet_removal/templates/doublet_removal.py similarity index 100% rename from modules/local/doublet_removal/templates/doublet_removal.py rename to modules/local/doublet_detection/doublet_removal/templates/doublet_removal.py diff --git a/modules/local/doublet_detection/doubletdetection/environment.yml b/modules/local/doublet_detection/doubletdetection/environment.yml new file mode 100644 index 0000000..85ea234 --- /dev/null +++ b/modules/local/doublet_detection/doubletdetection/environment.yml @@ -0,0 +1,10 @@ +name: doubletdetection +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::anndata=0.10.7 + - conda-forge::louvain=0.8.2 + - pip + - pip: + - doubletdetection==4.2 diff --git a/modules/local/doublet_detection/doubletdetection/main.nf b/modules/local/doublet_detection/doubletdetection/main.nf new file mode 100644 index 0000000..3a25eec --- /dev/null +++ b/modules/local/doublet_detection/doubletdetection/main.nf @@ -0,0 +1,24 @@ +process DOUBLETDETECTION { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/anndata_louvain_pip_doubletdetection:42d2326cc250350b': + 'community.wave.seqera.io/library/anndata_louvain_pip_doubletdetection:cbe92394c10372fa' }" + + input: + tuple val(meta), path(h5ad) + + output: + tuple val(meta), path("*.h5ad"), emit: h5ad + tuple val(meta), path("*.pkl") , emit: predictions + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + template 'doubletdetection.py' +} diff --git a/modules/local/doublet_detection/doubletdetection/templates/doubletdetection.py b/modules/local/doublet_detection/doubletdetection/templates/doubletdetection.py new file mode 100644 index 0000000..b1119d2 --- /dev/null +++ b/modules/local/doublet_detection/doubletdetection/templates/doubletdetection.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 + +# Set numba cache to /tmp +# This is not an ideal solution +import os +os.environ["NUMBA_CACHE_DIR"] = "/tmp" + +import anndata as ad +import doubletdetection +import platform + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +adata = ad.read_h5ad("${h5ad}") + +counts = adata.layers["counts"] + +clf = doubletdetection.BoostClassifier() +doublets = clf.fit(counts).predict() +scores = clf.doublet_score() + +adata.obs["doublet"] = [label == 1 for label in doublets] +adata.obs["doublet_score"] = scores + +adata.write_h5ad("${prefix}.h5ad") + +df = adata.obs[["doublet"]] +df.columns = ["${prefix}"] +df.to_pickle("${prefix}.pkl") + +# Versions + +versions = { + "${task.process}": { + "python": platform.python_version(), + "anndata": ad.__version__, + "doubletdetection": doubletdetection.__version__, + } +} + +with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) diff --git a/subworkflows/local/doublet_detection.nf b/subworkflows/local/doublet_detection.nf index 523053b..e4c006f 100644 --- a/subworkflows/local/doublet_detection.nf +++ b/subworkflows/local/doublet_detection.nf @@ -1,6 +1,7 @@ -include { SCVITOOLS_SOLO } from '../../modules/local/scvitools/solo' -include { SCANPY_SCRUBLET } from '../../modules/local/scanpy/scrublet' -include { DOUBLET_REMOVAL } from '../../modules/local/doublet_removal' +include { SCVITOOLS_SOLO } from '../../modules/local/scvitools/solo' +include { SCANPY_SCRUBLET } from '../../modules/local/scanpy/scrublet' +include { DOUBLETDETECTION } from '../../modules/local/doublet_detection/doubletdetection' +include { DOUBLET_REMOVAL } from '../../modules/local/doublet_detection/doublet_removal' workflow DOUBLET_DETECTION { take: @@ -25,6 +26,12 @@ workflow DOUBLET_DETECTION { ch_versions = SCANPY_SCRUBLET.out.versions } + if (methods.contains('doubletdetection')) { + DOUBLETDETECTION(ch_h5ad) + ch_predictions = ch_predictions.mix(DOUBLETDETECTION.out.predictions) + ch_versions = DOUBLETDETECTION.out.versions + } + DOUBLET_REMOVAL( ch_h5ad.join(ch_predictions.groupTuple()), params.doublet_detection_threshold