Merge branch 'v0.2.2_development' into krishnan_getters

FertigLab · Dec 7, 2023 · a42e56d · a42e56d
2 parents e0830e6 + 22022c9
commit a42e56d
Show file tree

Hide file tree

Showing 24 changed files with 291 additions and 3,885,922 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -38,11 +38,11 @@ biocViews:
     Network
 Suggests:
     knitr,
-    loomR,
     patchwork,
     rmarkdown,
     Seurat,
-    testthat
+    testthat,
+    formatR
 VignetteBuilder: knitr
 Language: en-US
 Roxygen: list(markdown = TRUE)
diff --git a/R/data.R b/R/data.R
diff --git a/R/plot_fxns.R b/R/plot_fxns.R
@@ -665,7 +665,7 @@ cor_scatter <- function(dom, tf, rec, remove_rec_dropout = TRUE, ...) {
   }
   dat <- data.frame(rec = rec_z_scores, tf = tar_tf_scores)
   ggscatter(dat, x = "rec", y = "tf", add = "reg.line", conf.int = FALSE, cor.coef = FALSE,
-    cor.method = "pearson", xlab = rec, ylab = tf, size = 0.25)
+    cor.method = "pearson", xlab = rec, ylab = tf, size = 0.25, ...)
 }
 #' Plot expression of a receptor's ligands by other cell types as a chord plot
 #' 

diff --git a/R/sysdata.rda b/R/sysdata.rda
diff --git a/README.md b/README.md
@@ -1,3 +1,5 @@
+[![R build status](https://github.com/FertigLab/domino2/workflows/r-build-check/badge.svg)](https://github.com/FertigLab/domino2/actions?workflow=r-build-check)
+
 ## Domino2: Inferring Cell Signaling from Single Cell RNA Sequencing Data
 
 Domino2 is an updated version of the original [domino](https://github.com/Elisseeff-Lab/domino) R package published in Nature Biomedical Engineering in [Computational reconstruction of the signalling networks surrounding implanted biomaterials from single-cell transcriptomics](https://doi.org/10.1038/s41551-021-00770-5). Domino2 is a tool for analysis of intra- and intercellular signaling in single cell RNA sequencing data based on transcription factor activation and receptor and ligand linkages.

diff --git a/data-raw/example_data.R b/data-raw/example_data.R
@@ -1,7 +1,20 @@
 # Code for preparing example objects for use in vignettes
 # Accessible to users for exploration as well
 library(Seurat)
-pbmc.data <- Read10X(data.dir = "../inst/extdata/pbmc3k_filtered_gene_bc_matrices")
+library(domino2)
+
+# Zenodo host of outputs from SCENIC analysis
+data_url <- "https://zenodo.org/records/10222767/files"
+temp_dir <- tempdir()
+
+# install 10X Genomics PBMC3K data
+pbmc_url <- "https://cf.10xgenomics.com/samples/cell/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz"
+pbmc_tar <- paste0(temp_dir, "/pbmc3k_filtered_gene_bc_matrices.tar.gz")
+download.file(url = pbmc_url, destfile = pbmc_tar)
+pbmc_dir <- paste0(temp_dir, "/pbmc3k_filtered_gene_bc_matrices")
+untar(tarfile = pbmc_tar, exdir = pbmc_dir)
+pbmc.data <- Read10X(data.dir = paste0(pbmc_dir, "/filtered_gene_bc_matrices/hg19"))
+
 pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200)
 pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
 pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
@@ -23,12 +36,22 @@ pbmc$cell_type <-
     to = cell_dict$cell_type
     )
 
+# Save Seurat object for generating test data
+saveRDS(pbmc, "inst/extdata/pbmc_seurat.rds")
+
 # Load SCENIC outputs
-scenic_dir = "../inst/extdata/scenic/"
-regulons <- read.csv(paste0(scenic_dir, "/regulons_pbmc_3k.csv"))
+scenic_dir <- paste0(temp_dir, "/scenic")
+if (!dir.exists(scenic_dir)) {
+  dir.create(scenic_dir)
+}
+download.file(url = paste0(data_url, "/scenic_auc_pbmc_3k.csv"),
+              destfile = paste0(scenic_dir, "/auc_pbmc_3k.csv"))
+download.file(url = paste0(data_url, "/scenic_regulons_pbmc_3k.csv"),
+              destfile = paste0(scenic_dir, "/regulons_pbmc_3k.csv"))
 auc <- read.table(paste0(scenic_dir, "/auc_pbmc_3k.csv"),
                 header = TRUE, row.names = 1,
                 stringsAsFactors = FALSE, sep = ",")
+regulons <- read.csv(paste0(scenic_dir, "/regulons_pbmc_3k.csv"))
 
 # Create list of SCENIC regulons
 regulons <- regulons[-1:-2,]
@@ -41,11 +64,17 @@ auc_in <- as.data.frame(t(auc))
 rownames(auc_in) <- gsub("\\.\\.\\.$", "", rownames(auc_in))
 
 # Load CellPhoneDB Database
-cellphonedb_2_path <- "../inst/extdata/cellphoneDB_v4"
-complexes <- read.csv(paste0(cellphonedb_2_path, "/complex_input.csv"), stringsAsFactors = FALSE)
-genes <- read.csv(paste0(cellphonedb_2_path, "/gene_input.csv"), stringsAsFactors = FALSE)
-interactions <- read.csv(paste0(cellphonedb_2_path, "/interaction_input.csv"), stringsAsFactors = FALSE)
-proteins <- read.csv(paste0(cellphonedb_2_path, "/protein_input.csv"), stringsAsFactors = FALSE)
+cellphone_url <- "https://github.com/ventolab/cellphonedb-data/archive/refs/tags/v4.0.0.tar.gz"
+cellphone_tar <- paste0(temp_dir, "/cellphoneDB_v4.tar.gz")
+download.file(url = cellphone_url, destfile = cellphone_tar)
+cellphone_dir <- paste0(temp_dir, "/cellphoneDB_v4")
+untar(tarfile = cellphone_tar, exdir = cellphone_dir)
+cellphone_data <- paste0(cellphone_dir, "/cellphonedb-data-4.0.0/data")
+
+interactions <- read.csv(paste0(cellphone_data, "/interaction_input.csv"), stringsAsFactors = FALSE)
+complexes <- read.csv(paste0(cellphone_data, "/complex_input.csv"), stringsAsFactors = FALSE)
+genes <- read.csv(paste0(cellphone_data, "/gene_input.csv"), stringsAsFactors = FALSE)
+proteins <- read.csv(paste0(cellphone_data, "/protein_input.csv"), stringsAsFactors = FALSE)
 
 # Make RL Map
 rl_map <- create_rl_map_cellphonedb(
@@ -84,5 +113,5 @@ pbmc_dom <- build_domino(
     min_rec_percentage = 0.1 # Minimum percent of cells that must express receptor
 )
 
-# Save
-usethis::use_data(pbmc_dom, compress = "xz", overwrite = TRUE)
+# Save domino object for generating test data
+saveRDS(pbmc_dom, "inst/extdata/pbmc_domino_built.rds")
diff --git a/data-raw/testdata.R b/data-raw/testdata.R
@@ -1,29 +1,67 @@
 # generate objects for comparison in testing scripts
 
+library(Seurat)
 library(domino2, lib = "../domino_libraries/libraries/v0_2_1/")
 
-# code to prepare `pbmc3k` dataset
-pbmc.data <- Seurat::Read10X(data.dir = "../inst/extdata/pbmc3k_filtered_gene_bc_matrices/")
-pbmc <- Seurat::CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200)
-pbmc[["percent.mt"]] <- Seurat::PercentageFeatureSet(pbmc, pattern = "^MT-")
-pbmc <- Seurat::NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
-pbmc <- Seurat::FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
-pbmc <- Seurat::ScaleData(pbmc, features = rownames(pbmc))
-pbmc <- Seurat::RunPCA(pbmc, features = Seurat::VariableFeatures(object = pbmc), verbose = FALSE)
-pbmc <- Seurat::FindNeighbors(pbmc, dims = 1:10)
-pbmc <- Seurat::FindClusters(pbmc, resolution = 0.5)
-pbmc <- Seurat::RunUMAP(pbmc, dims = 1:10)
-# Annotate clusters with cell phenotypes
-cell_dict <- data.frame(cluster = c(0:8), cell_type = c("naive_CD4_T_cell", "CD14_monocyte", "memory_CD4_T_cell", "B_cell", "CD8_T_cell", "CD16_monocyte", "NK_cell", "dendritic_cell", "platelet"))
-pbmc$cell_type <- plyr::mapvalues(pbmc$seurat_clusters, from = cell_dict$cluster, to = cell_dict$cell_type)
+# load data for generation of test results from zenodo repository
+# Zenodo host of outputs from SCENIC analysis
+data_url <- "https://zenodo.org/records/10222767/files"
+temp_dir <- tempdir()
+
+pbmc_dir <- paste0(temp_dir, "/pbmc")
+if (!dir.exists(pbmc_dir)) {
+  dir.create(pbmc_dir)
+}
+# Seurat object of preprocessed PBMC3K data
+download.file(url = paste0(data_url, "/pbmc_seurat.rds"),
+              destfile = paste0(pbmc_dir, "/pbmc_seurat.rds"))
+pbmc <- readRDS(paste0(pbmc_dir, "/pbmc_seurat.rds"))
+
+# SCENIC input files
+scenic_dir <- paste0(temp_dir, "/scenic")
+if (!dir.exists(scenic_dir)) {
+  dir.create(scenic_dir)
+}
+download.file(url = paste0(data_url, "/scenic_auc_pbmc_3k.csv"),
+              destfile = paste0(scenic_dir, "/auc_pbmc_3k.csv"))
+download.file(url = paste0(data_url, "/scenic_regulons_pbmc_3k.csv"),
+              destfile = paste0(scenic_dir, "/regulons_pbmc_3k.csv"))
+auc <- read.table(paste0(scenic_dir, "/auc_pbmc_3k.csv"),
+                  header = TRUE, row.names = 1,
+                  stringsAsFactors = FALSE, sep = ",")
+regulons <- read.csv(paste0(scenic_dir, "/regulons_pbmc_3k.csv"))
+
+# CellPhoneDB Database
+cellphone_url <- "https://github.com/ventolab/cellphonedb-data/archive/refs/tags/v4.0.0.tar.gz"
+cellphone_tar <- paste0(temp_dir, "/cellphoneDB_v4.tar.gz")
+download.file(url = cellphone_url, destfile = cellphone_tar)
+cellphone_dir <- paste0(temp_dir, "/cellphoneDB_v4")
+untar(tarfile = cellphone_tar, exdir = cellphone_dir)
+cellphone_data <- paste0(cellphone_dir, "/cellphonedb-data-4.0.0/data")
+
+interactions <- read.csv(paste0(cellphone_data, "/interaction_input.csv"), stringsAsFactors = FALSE)
+complexes <- read.csv(paste0(cellphone_data, "/complex_input.csv"), stringsAsFactors = FALSE)
+genes <- read.csv(paste0(cellphone_data, "/gene_input.csv"), stringsAsFactors = FALSE)
+proteins <- read.csv(paste0(cellphone_data, "/protein_input.csv"), stringsAsFactors = FALSE)
 
 # subset the pbmc data to fewer cells to meet package requirements
-RNA_features <- c("FAS", "FASLG", "ITGAM", "ITGB2", "FCER2", "LCK", "CD8A", "CD8B", "C5", "C5AR1")
-TF_features <- c("CLOCK", "TAF1", "BCL6")
-name_features <- c("FAS", "FASLG", "integrin_aMb2_complex", "FCER2", "LCK", "CD8_receptor", "C5", "C5AR1")
-cell_types_dwn <- c("CD8_T_cell", "NK_cell", "B_cell")
-n <- 100
+RNA_features <- c(
+  "TNF", "TNFSF13", "FASLG", "FAS", 
+  "ITGAM", "ITGB2", "ITGAV", "ITGB3", "FCER2", 
+  "IL7", "IL7R", "IL2RG",
+  "CD22", "PTPRC",
+  "IGF1", "IGF1R")
+TF_features <- c("ZNF257", "ATF4", "RUNX1")
+name_features <- c(
+  "TNF", "TNFSF13", "FASLG", "FAS", 
+  "integrin_aMb2_complex", "integrin_aVb3_complex", "FCER2", 
+  "IL7", "IL7_receptor", 
+  "CD22", "PTPRC",
+  "IGF1", "IGF1R")
+cell_types_dwn <- c("CD8_T_cell", "CD14_monocyte", "B_cell")
+n <- 120
 cell_list <- list()
+set.seed(123)
 for(i in seq_along(cell_types_dwn)){
   cell <- cell_types_dwn[i]
   cell_barcodes <- colnames(pbmc)[pbmc$cell_type == cell]
@@ -35,78 +73,69 @@ cluster_dwn <- factor(
     pbmc$cell_type[barcodes_dwn],
     levels = cell_types_dwn
 )
-clusters_test <- cluster_dwn
-RNA_count_test <- pbmc@assays$RNA@counts[
+clusters_tiny <- cluster_dwn
+RNA_count_tiny <- pbmc@assays$RNA@counts[
   rownames(pbmc@assays$RNA@counts) %in% RNA_features, 
   colnames(pbmc@assays$RNA@counts) %in% barcodes_dwn]
-RNA_zscore_test <- pbmc@assays$RNA@scale.data[
+RNA_zscore_tiny <- pbmc@assays$RNA@scale.data[
   rownames(pbmc@assays$RNA@scale.data) %in% RNA_features, 
   colnames(pbmc@assays$RNA@scale.data) %in% barcodes_dwn]
 
-# cellphoneDB database
-cellphonedb_4_path <- "../inst/extdata/cellphoneDB_v4/"
-complexes <- read.csv(paste0(cellphonedb_4_path, "/complex_input.csv"), stringsAsFactors = FALSE)
-complexes_test <- complexes[complexes$complex_name %in% name_features,]
-genes <- read.csv(paste0(cellphonedb_4_path, "/gene_input.csv"), stringsAsFactors = FALSE)
-genes_test <- genes[genes$gene_name %in% RNA_features,]
-proteins <- read.csv(paste0(cellphonedb_4_path, "/protein_input.csv"), stringsAsFactors = FALSE)
-proteins_test <- proteins[proteins$uniprot %in% genes_test$uniprot,]
-interactions <- read.csv(paste0(cellphonedb_4_path, "/interaction_input.csv"), stringsAsFactors = FALSE)
-interactions_test <- interactions[
-  (interactions$partner_a %in% proteins_test$uniprot | interactions$partner_a %in% complexes_test$complex_name) & 
-    (interactions$partner_b%in% proteins_test$uniprot | interactions$partner_b %in% complexes_test$complex_name),]
-
-# read in and subset SCENIC inputs
-auc <- read.table(
-  "../inst/extdata/scenic/auc_pbmc_3k.csv",
-  header = TRUE, row.names = 1, stringsAsFactors = FALSE, sep = ","
-)
+# subset CellPhoneDB inputs
+complexes_tiny <- complexes[complexes$complex_name %in% name_features,]
+genes_tiny <- genes[genes$gene_name %in% RNA_features,]
+proteins_tiny <- proteins[proteins$uniprot %in% genes_tiny$uniprot,]
+interactions_tiny <- interactions[
+  (interactions$partner_a %in% proteins_tiny$uniprot | interactions$partner_a %in% complexes_tiny$complex_name) & 
+    (interactions$partner_b%in% proteins_tiny$uniprot | interactions$partner_b %in% complexes_tiny$complex_name),]
+
+
+# subset SCENIC inputs
 auc <- t(auc)
 rownames(auc) <- gsub("\\.\\.\\.$", "", rownames(auc))
-auc_test <- auc[TF_features, barcodes_dwn]
+auc_tiny <- auc[TF_features, barcodes_dwn]
 
-regulons <- read.csv("../inst/extdata/scenic/regulons_pbmc_3k.csv")
 regulons <- regulons[-1:-2, ]
 colnames(regulons) <- c("TF", "MotifID", "AUC", "NES", "MotifSimilarityQvalue", "OrthologousIdentity", "Annotation", "Context", "TargetGenes", "RankAtMax")
-regulons_test <- regulons[regulons$TF %in% TF_features,]
+regulons_tiny <- regulons[regulons$TF %in% TF_features,]
 
 # Make rl_map
-rl_map_test <- domino2::create_rl_map_cellphonedb(
-  genes = genes_test,
-  proteins = proteins_test,
-  interactions = interactions_test,
-  complexes = complexes_test
+rl_map_tiny <- domino2::create_rl_map_cellphonedb(
+  genes = genes_tiny,
+  proteins = proteins_tiny,
+  interactions = interactions_tiny,
+  complexes = complexes_tiny
 )
 
 # Get regulon list
-regulon_list_test <- domino2::create_regulon_list_scenic(
-  regulons = regulons_test
+regulon_list_tiny <- domino2::create_regulon_list_scenic(
+  regulons = regulons_tiny
 )
 
 # Create test domino object
-pbmc_dom_test <- create_domino(
-  rl_map = rl_map_test,
-  features = auc_test,
-  counts = RNA_count_test,
-  z_scores = RNA_zscore_test,
-  clusters = clusters_test,
-  tf_targets = regulon_list_test,
+pbmc_dom_tiny <- create_domino(
+  rl_map = rl_map_tiny,
+  features = auc_tiny,
+  counts = RNA_count_tiny,
+  z_scores = RNA_zscore_tiny,
+  clusters = clusters_tiny,
+  tf_targets = regulon_list_tiny,
   use_clusters = TRUE,
   use_complexes = TRUE,
   remove_rec_dropout = FALSE
 )
 
 # Create built domino object
-pbmc_dom_built_test <- build_domino(
-  dom = pbmc_dom_test,
+pbmc_dom_built_tiny <- build_domino(
+  dom = pbmc_dom_tiny,
   min_tf_pval = .05,
   max_tf_per_clust = Inf,
   max_rec_per_tf = Inf,
-  rec_tf_cor_threshold = .15,
-  min_rec_percentage = 0.03 
+  rec_tf_cor_threshold = .1,
+  min_rec_percentage = 0.01
 )
 
 # Save all test files to internal sysdata object
-usethis::use_data(pbmc_dom_built_test, complexes_test, genes_test, proteins_test, interactions_test,
-    pbmc_dom_test, regulon_list_test, rl_map_test, regulons_test, clusters_test,
-    RNA_count_test, RNA_zscore_test, auc_test, internal = TRUE)
+usethis::use_data(pbmc_dom_built_tiny, complexes_tiny, genes_tiny, proteins_tiny, interactions_tiny,
+    pbmc_dom_tiny, regulon_list_tiny, rl_map_tiny, regulons_tiny, clusters_tiny,
+    RNA_count_tiny, RNA_zscore_tiny, auc_tiny, internal = TRUE)
diff --git a/data/pbmc_dom.rda b/data/pbmc_dom.rda