Skip to content

Commit

Permalink
Merge branch 'v0.2.2_development' into krishnan_getters
Browse files Browse the repository at this point in the history
  • Loading branch information
jmitchell81 authored Dec 7, 2023
2 parents e0830e6 + 22022c9 commit a42e56d
Show file tree
Hide file tree
Showing 24 changed files with 291 additions and 3,885,922 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ biocViews:
Network
Suggests:
knitr,
loomR,
patchwork,
rmarkdown,
Seurat,
testthat
testthat,
formatR
VignetteBuilder: knitr
Language: en-US
Roxygen: list(markdown = TRUE)
12 changes: 0 additions & 12 deletions R/data.R

This file was deleted.

2 changes: 1 addition & 1 deletion R/plot_fxns.R
Original file line number Diff line number Diff line change
Expand Up @@ -665,7 +665,7 @@ cor_scatter <- function(dom, tf, rec, remove_rec_dropout = TRUE, ...) {
}
dat <- data.frame(rec = rec_z_scores, tf = tar_tf_scores)
ggscatter(dat, x = "rec", y = "tf", add = "reg.line", conf.int = FALSE, cor.coef = FALSE,
cor.method = "pearson", xlab = rec, ylab = tf, size = 0.25)
cor.method = "pearson", xlab = rec, ylab = tf, size = 0.25, ...)
}
#' Plot expression of a receptor's ligands by other cell types as a chord plot
#'
Expand Down
Binary file added R/sysdata.rda
Binary file not shown.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
[![R build status](https://github.com/FertigLab/domino2/workflows/r-build-check/badge.svg)](https://github.com/FertigLab/domino2/actions?workflow=r-build-check)

## Domino2: Inferring Cell Signaling from Single Cell RNA Sequencing Data

Domino2 is an updated version of the original [domino](https://github.com/Elisseeff-Lab/domino) R package published in Nature Biomedical Engineering in [Computational reconstruction of the signalling networks surrounding implanted biomaterials from single-cell transcriptomics](https://doi.org/10.1038/s41551-021-00770-5). Domino2 is a tool for analysis of intra- and intercellular signaling in single cell RNA sequencing data based on transcription factor activation and receptor and ligand linkages.
Expand Down
49 changes: 39 additions & 10 deletions data-raw/example_data.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,20 @@
# Code for preparing example objects for use in vignettes
# Accessible to users for exploration as well
library(Seurat)
pbmc.data <- Read10X(data.dir = "../inst/extdata/pbmc3k_filtered_gene_bc_matrices")
library(domino2)

# Zenodo host of outputs from SCENIC analysis
data_url <- "https://zenodo.org/records/10222767/files"
temp_dir <- tempdir()

# install 10X Genomics PBMC3K data
pbmc_url <- "https://cf.10xgenomics.com/samples/cell/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz"
pbmc_tar <- paste0(temp_dir, "/pbmc3k_filtered_gene_bc_matrices.tar.gz")
download.file(url = pbmc_url, destfile = pbmc_tar)
pbmc_dir <- paste0(temp_dir, "/pbmc3k_filtered_gene_bc_matrices")
untar(tarfile = pbmc_tar, exdir = pbmc_dir)
pbmc.data <- Read10X(data.dir = paste0(pbmc_dir, "/filtered_gene_bc_matrices/hg19"))

pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
Expand All @@ -23,12 +36,22 @@ pbmc$cell_type <-
to = cell_dict$cell_type
)

# Save Seurat object for generating test data
saveRDS(pbmc, "inst/extdata/pbmc_seurat.rds")

# Load SCENIC outputs
scenic_dir = "../inst/extdata/scenic/"
regulons <- read.csv(paste0(scenic_dir, "/regulons_pbmc_3k.csv"))
scenic_dir <- paste0(temp_dir, "/scenic")
if (!dir.exists(scenic_dir)) {
dir.create(scenic_dir)
}
download.file(url = paste0(data_url, "/scenic_auc_pbmc_3k.csv"),
destfile = paste0(scenic_dir, "/auc_pbmc_3k.csv"))
download.file(url = paste0(data_url, "/scenic_regulons_pbmc_3k.csv"),
destfile = paste0(scenic_dir, "/regulons_pbmc_3k.csv"))
auc <- read.table(paste0(scenic_dir, "/auc_pbmc_3k.csv"),
header = TRUE, row.names = 1,
stringsAsFactors = FALSE, sep = ",")
regulons <- read.csv(paste0(scenic_dir, "/regulons_pbmc_3k.csv"))

# Create list of SCENIC regulons
regulons <- regulons[-1:-2,]
Expand All @@ -41,11 +64,17 @@ auc_in <- as.data.frame(t(auc))
rownames(auc_in) <- gsub("\\.\\.\\.$", "", rownames(auc_in))

# Load CellPhoneDB Database
cellphonedb_2_path <- "../inst/extdata/cellphoneDB_v4"
complexes <- read.csv(paste0(cellphonedb_2_path, "/complex_input.csv"), stringsAsFactors = FALSE)
genes <- read.csv(paste0(cellphonedb_2_path, "/gene_input.csv"), stringsAsFactors = FALSE)
interactions <- read.csv(paste0(cellphonedb_2_path, "/interaction_input.csv"), stringsAsFactors = FALSE)
proteins <- read.csv(paste0(cellphonedb_2_path, "/protein_input.csv"), stringsAsFactors = FALSE)
cellphone_url <- "https://github.com/ventolab/cellphonedb-data/archive/refs/tags/v4.0.0.tar.gz"
cellphone_tar <- paste0(temp_dir, "/cellphoneDB_v4.tar.gz")
download.file(url = cellphone_url, destfile = cellphone_tar)
cellphone_dir <- paste0(temp_dir, "/cellphoneDB_v4")
untar(tarfile = cellphone_tar, exdir = cellphone_dir)
cellphone_data <- paste0(cellphone_dir, "/cellphonedb-data-4.0.0/data")

interactions <- read.csv(paste0(cellphone_data, "/interaction_input.csv"), stringsAsFactors = FALSE)
complexes <- read.csv(paste0(cellphone_data, "/complex_input.csv"), stringsAsFactors = FALSE)
genes <- read.csv(paste0(cellphone_data, "/gene_input.csv"), stringsAsFactors = FALSE)
proteins <- read.csv(paste0(cellphone_data, "/protein_input.csv"), stringsAsFactors = FALSE)

# Make RL Map
rl_map <- create_rl_map_cellphonedb(
Expand Down Expand Up @@ -84,5 +113,5 @@ pbmc_dom <- build_domino(
min_rec_percentage = 0.1 # Minimum percent of cells that must express receptor
)

# Save
usethis::use_data(pbmc_dom, compress = "xz", overwrite = TRUE)
# Save domino object for generating test data
saveRDS(pbmc_dom, "inst/extdata/pbmc_domino_built.rds")
157 changes: 93 additions & 64 deletions data-raw/testdata.R
Original file line number Diff line number Diff line change
@@ -1,29 +1,67 @@
# generate objects for comparison in testing scripts

library(Seurat)
library(domino2, lib = "../domino_libraries/libraries/v0_2_1/")

# code to prepare `pbmc3k` dataset
pbmc.data <- Seurat::Read10X(data.dir = "../inst/extdata/pbmc3k_filtered_gene_bc_matrices/")
pbmc <- Seurat::CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200)
pbmc[["percent.mt"]] <- Seurat::PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- Seurat::NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- Seurat::FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
pbmc <- Seurat::ScaleData(pbmc, features = rownames(pbmc))
pbmc <- Seurat::RunPCA(pbmc, features = Seurat::VariableFeatures(object = pbmc), verbose = FALSE)
pbmc <- Seurat::FindNeighbors(pbmc, dims = 1:10)
pbmc <- Seurat::FindClusters(pbmc, resolution = 0.5)
pbmc <- Seurat::RunUMAP(pbmc, dims = 1:10)
# Annotate clusters with cell phenotypes
cell_dict <- data.frame(cluster = c(0:8), cell_type = c("naive_CD4_T_cell", "CD14_monocyte", "memory_CD4_T_cell", "B_cell", "CD8_T_cell", "CD16_monocyte", "NK_cell", "dendritic_cell", "platelet"))
pbmc$cell_type <- plyr::mapvalues(pbmc$seurat_clusters, from = cell_dict$cluster, to = cell_dict$cell_type)
# load data for generation of test results from zenodo repository
# Zenodo host of outputs from SCENIC analysis
data_url <- "https://zenodo.org/records/10222767/files"
temp_dir <- tempdir()

pbmc_dir <- paste0(temp_dir, "/pbmc")
if (!dir.exists(pbmc_dir)) {
dir.create(pbmc_dir)
}
# Seurat object of preprocessed PBMC3K data
download.file(url = paste0(data_url, "/pbmc_seurat.rds"),
destfile = paste0(pbmc_dir, "/pbmc_seurat.rds"))
pbmc <- readRDS(paste0(pbmc_dir, "/pbmc_seurat.rds"))

# SCENIC input files
scenic_dir <- paste0(temp_dir, "/scenic")
if (!dir.exists(scenic_dir)) {
dir.create(scenic_dir)
}
download.file(url = paste0(data_url, "/scenic_auc_pbmc_3k.csv"),
destfile = paste0(scenic_dir, "/auc_pbmc_3k.csv"))
download.file(url = paste0(data_url, "/scenic_regulons_pbmc_3k.csv"),
destfile = paste0(scenic_dir, "/regulons_pbmc_3k.csv"))
auc <- read.table(paste0(scenic_dir, "/auc_pbmc_3k.csv"),
header = TRUE, row.names = 1,
stringsAsFactors = FALSE, sep = ",")
regulons <- read.csv(paste0(scenic_dir, "/regulons_pbmc_3k.csv"))

# CellPhoneDB Database
cellphone_url <- "https://github.com/ventolab/cellphonedb-data/archive/refs/tags/v4.0.0.tar.gz"
cellphone_tar <- paste0(temp_dir, "/cellphoneDB_v4.tar.gz")
download.file(url = cellphone_url, destfile = cellphone_tar)
cellphone_dir <- paste0(temp_dir, "/cellphoneDB_v4")
untar(tarfile = cellphone_tar, exdir = cellphone_dir)
cellphone_data <- paste0(cellphone_dir, "/cellphonedb-data-4.0.0/data")

interactions <- read.csv(paste0(cellphone_data, "/interaction_input.csv"), stringsAsFactors = FALSE)
complexes <- read.csv(paste0(cellphone_data, "/complex_input.csv"), stringsAsFactors = FALSE)
genes <- read.csv(paste0(cellphone_data, "/gene_input.csv"), stringsAsFactors = FALSE)
proteins <- read.csv(paste0(cellphone_data, "/protein_input.csv"), stringsAsFactors = FALSE)

# subset the pbmc data to fewer cells to meet package requirements
RNA_features <- c("FAS", "FASLG", "ITGAM", "ITGB2", "FCER2", "LCK", "CD8A", "CD8B", "C5", "C5AR1")
TF_features <- c("CLOCK", "TAF1", "BCL6")
name_features <- c("FAS", "FASLG", "integrin_aMb2_complex", "FCER2", "LCK", "CD8_receptor", "C5", "C5AR1")
cell_types_dwn <- c("CD8_T_cell", "NK_cell", "B_cell")
n <- 100
RNA_features <- c(
"TNF", "TNFSF13", "FASLG", "FAS",
"ITGAM", "ITGB2", "ITGAV", "ITGB3", "FCER2",
"IL7", "IL7R", "IL2RG",
"CD22", "PTPRC",
"IGF1", "IGF1R")
TF_features <- c("ZNF257", "ATF4", "RUNX1")
name_features <- c(
"TNF", "TNFSF13", "FASLG", "FAS",
"integrin_aMb2_complex", "integrin_aVb3_complex", "FCER2",
"IL7", "IL7_receptor",
"CD22", "PTPRC",
"IGF1", "IGF1R")
cell_types_dwn <- c("CD8_T_cell", "CD14_monocyte", "B_cell")
n <- 120
cell_list <- list()
set.seed(123)
for(i in seq_along(cell_types_dwn)){
cell <- cell_types_dwn[i]
cell_barcodes <- colnames(pbmc)[pbmc$cell_type == cell]
Expand All @@ -35,78 +73,69 @@ cluster_dwn <- factor(
pbmc$cell_type[barcodes_dwn],
levels = cell_types_dwn
)
clusters_test <- cluster_dwn
RNA_count_test <- pbmc@assays$RNA@counts[
clusters_tiny <- cluster_dwn
RNA_count_tiny <- pbmc@assays$RNA@counts[
rownames(pbmc@assays$RNA@counts) %in% RNA_features,
colnames(pbmc@assays$RNA@counts) %in% barcodes_dwn]
RNA_zscore_test <- pbmc@assays$RNA@scale.data[
RNA_zscore_tiny <- pbmc@assays$RNA@scale.data[
rownames(pbmc@assays$RNA@scale.data) %in% RNA_features,
colnames(pbmc@assays$RNA@scale.data) %in% barcodes_dwn]

# cellphoneDB database
cellphonedb_4_path <- "../inst/extdata/cellphoneDB_v4/"
complexes <- read.csv(paste0(cellphonedb_4_path, "/complex_input.csv"), stringsAsFactors = FALSE)
complexes_test <- complexes[complexes$complex_name %in% name_features,]
genes <- read.csv(paste0(cellphonedb_4_path, "/gene_input.csv"), stringsAsFactors = FALSE)
genes_test <- genes[genes$gene_name %in% RNA_features,]
proteins <- read.csv(paste0(cellphonedb_4_path, "/protein_input.csv"), stringsAsFactors = FALSE)
proteins_test <- proteins[proteins$uniprot %in% genes_test$uniprot,]
interactions <- read.csv(paste0(cellphonedb_4_path, "/interaction_input.csv"), stringsAsFactors = FALSE)
interactions_test <- interactions[
(interactions$partner_a %in% proteins_test$uniprot | interactions$partner_a %in% complexes_test$complex_name) &
(interactions$partner_b%in% proteins_test$uniprot | interactions$partner_b %in% complexes_test$complex_name),]

# read in and subset SCENIC inputs
auc <- read.table(
"../inst/extdata/scenic/auc_pbmc_3k.csv",
header = TRUE, row.names = 1, stringsAsFactors = FALSE, sep = ","
)
# subset CellPhoneDB inputs
complexes_tiny <- complexes[complexes$complex_name %in% name_features,]
genes_tiny <- genes[genes$gene_name %in% RNA_features,]
proteins_tiny <- proteins[proteins$uniprot %in% genes_tiny$uniprot,]
interactions_tiny <- interactions[
(interactions$partner_a %in% proteins_tiny$uniprot | interactions$partner_a %in% complexes_tiny$complex_name) &
(interactions$partner_b%in% proteins_tiny$uniprot | interactions$partner_b %in% complexes_tiny$complex_name),]


# subset SCENIC inputs
auc <- t(auc)
rownames(auc) <- gsub("\\.\\.\\.$", "", rownames(auc))
auc_test <- auc[TF_features, barcodes_dwn]
auc_tiny <- auc[TF_features, barcodes_dwn]

regulons <- read.csv("../inst/extdata/scenic/regulons_pbmc_3k.csv")
regulons <- regulons[-1:-2, ]
colnames(regulons) <- c("TF", "MotifID", "AUC", "NES", "MotifSimilarityQvalue", "OrthologousIdentity", "Annotation", "Context", "TargetGenes", "RankAtMax")
regulons_test <- regulons[regulons$TF %in% TF_features,]
regulons_tiny <- regulons[regulons$TF %in% TF_features,]

# Make rl_map
rl_map_test <- domino2::create_rl_map_cellphonedb(
genes = genes_test,
proteins = proteins_test,
interactions = interactions_test,
complexes = complexes_test
rl_map_tiny <- domino2::create_rl_map_cellphonedb(
genes = genes_tiny,
proteins = proteins_tiny,
interactions = interactions_tiny,
complexes = complexes_tiny
)

# Get regulon list
regulon_list_test <- domino2::create_regulon_list_scenic(
regulons = regulons_test
regulon_list_tiny <- domino2::create_regulon_list_scenic(
regulons = regulons_tiny
)

# Create test domino object
pbmc_dom_test <- create_domino(
rl_map = rl_map_test,
features = auc_test,
counts = RNA_count_test,
z_scores = RNA_zscore_test,
clusters = clusters_test,
tf_targets = regulon_list_test,
pbmc_dom_tiny <- create_domino(
rl_map = rl_map_tiny,
features = auc_tiny,
counts = RNA_count_tiny,
z_scores = RNA_zscore_tiny,
clusters = clusters_tiny,
tf_targets = regulon_list_tiny,
use_clusters = TRUE,
use_complexes = TRUE,
remove_rec_dropout = FALSE
)

# Create built domino object
pbmc_dom_built_test <- build_domino(
dom = pbmc_dom_test,
pbmc_dom_built_tiny <- build_domino(
dom = pbmc_dom_tiny,
min_tf_pval = .05,
max_tf_per_clust = Inf,
max_rec_per_tf = Inf,
rec_tf_cor_threshold = .15,
min_rec_percentage = 0.03
rec_tf_cor_threshold = .1,
min_rec_percentage = 0.01
)

# Save all test files to internal sysdata object
usethis::use_data(pbmc_dom_built_test, complexes_test, genes_test, proteins_test, interactions_test,
pbmc_dom_test, regulon_list_test, rl_map_test, regulons_test, clusters_test,
RNA_count_test, RNA_zscore_test, auc_test, internal = TRUE)
usethis::use_data(pbmc_dom_built_tiny, complexes_tiny, genes_tiny, proteins_tiny, interactions_tiny,
pbmc_dom_tiny, regulon_list_tiny, rl_map_tiny, regulons_tiny, clusters_tiny,
RNA_count_tiny, RNA_zscore_tiny, auc_tiny, internal = TRUE)
Binary file removed data/pbmc_dom.rda
Binary file not shown.
Loading

0 comments on commit a42e56d

Please sign in to comment.