From b89ecd2da65fbe7950d2eb28d7b40f6ac8757bd6 Mon Sep 17 00:00:00 2001 From: yingxinlin Date: Tue, 11 Jul 2023 13:48:29 -0400 Subject: [PATCH] remove dependencies on MatrixUtilis --- DESCRIPTION | 2 +- R/readMethods.R | 2 +- R/utils.R | 101 +++++++++++++++++++++++++++++++++++++++++++++++ README.md | 13 +++--- docs/TODO.html | 71 --------------------------------- docs/index.html | 7 ++++ docs/pkgdown.yml | 2 +- 7 files changed, 118 insertions(+), 80 deletions(-) delete mode 100644 docs/TODO.html diff --git a/DESCRIPTION b/DESCRIPTION index f9b1162..4e47abd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,7 +38,7 @@ Imports: maptools, entropy, data.table, - Matrix.utils + grr Suggests: BiocStyle, knitr, diff --git a/R/readMethods.R b/R/readMethods.R index a616ac4..eafa1fc 100644 --- a/R/readMethods.R +++ b/R/readMethods.R @@ -125,7 +125,7 @@ readBIDCell <- function(data_dir, if (sum(duplicated_cell_id) > 0) { warning(paste("There are", sum(duplicated_cell_id), "cells with duplicated cell id")) - data <- Matrix.utils::aggregate.Matrix(data, cell_outputs$cell_id) + data <- aggregate.Matrix(data, cell_outputs$cell_id) rownames(data) <- paste("Cell", rownames(data), sep = "_") meta <- meta[!duplicated_cell_id, ] rownames(meta) <- paste("Cell", meta$cell_id, sep = "_") diff --git a/R/utils.R b/R/utils.R index dbfac21..5f486bb 100644 --- a/R/utils.R +++ b/R/utils.R @@ -43,4 +43,105 @@ subset <- function(spe, col_idx) { } +# From Matrix.utils::aggregate.Matrix() function, which has been archived on CRAN + +aggregate.Matrix <- function(x, groupings = NULL, form = NULL, fun = 'sum', ...) { + if (!methods::is(x,'Matrix')) { + x <- Matrix::Matrix(as.matrix(x), sparse = TRUE) + } + if (fun=='count') { + x <- x != 0 + } + groupings2 <- groupings + if (!methods::is(groupings2,'data.frame')) { + groupings2 <- as(groupings2,'data.frame') + } + + groupings2 <- data.frame(lapply(groupings2, as.factor)) + groupings2 <- data.frame(interaction(groupings2,sep = '_')) + colnames(groupings2) <- 'A' + if(is.null(form)) { + form <- stats::as.formula('~0+.') + } + + form <- stats::as.formula(form) + mapping <- dMcast(groupings2,form) + colnames(mapping) <- substring(colnames(mapping),2) + result <- Matrix::t(mapping) %*% x + + + attr(result,'crosswalk') <- grr::extract(groupings,match(rownames(result),groupings2$A)) + return(result) +} + +# From Matrix.utils::dMcast() function, which has been archived on CRAN + +dMcast <- function(data, formula, fun.aggregate='sum', + value.var=NULL, as.factors=FALSE, + factor.nas=TRUE, drop.unused.levels=TRUE) { + values <- 1 + if( !is.null(value.var)) { + values<-data[,value.var] + } + + alltms <- stats::terms(formula, data = data) + response <- rownames(attr(alltms,'factors'))[attr(alltms,'response')] + tm <- attr(alltms,"term.labels") + interactionsIndex <- grep(':',tm) + interactions <- tm[interactionsIndex] + simple <- setdiff(tm,interactions) + i2 <- strsplit(interactions,':') + newterms <- unlist(lapply(i2, function (x) { + paste("paste(", paste(x,collapse=','),",","sep='_'",")") + })) + newterms <- c(simple,newterms) + newformula <- stats::as.formula(paste('~0+',paste(newterms,collapse='+'))) + allvars <- all.vars(alltms) + data <- data[,c(allvars),drop=FALSE] + if (as.factors) { + data <- data.frame(lapply(data,as.factor)) + } + + characters <- unlist(lapply(data,is.character)) + data[,characters] <- lapply(data[,characters,drop = FALSE], as.factor) + factors <- unlist(lapply(data, is.factor)) + #Prevents errors with 1 or fewer distinct levels + data[, factors] <- lapply(data[, factors, drop = FALSE], function (x) { + if(factor.nas) { + if(any(is.na(x))) { + levels(x) <- c(levels(x),'NA') + x[is.na(x)] <- 'NA' + } + } + + if (drop.unused.levels) { + if(nlevels(x) != length(stats::na.omit(unique(x)))) { + x <- factor(as.character(x)) + } + } + + + y <- stats::contrasts(x, contrasts = FALSE, sparse = TRUE) + attr(x,'contrasts') <- y + return(x) + }) + #Allows NAs to pass + attr(data,'na.action') <- stats::na.pass + result <- Matrix::sparse.model.matrix(newformula, data, drop.unused.levels = FALSE, row.names=FALSE) + brokenNames <- grep('paste(',colnames(result),fixed = TRUE) + colnames(result)[brokenNames] <- lapply(colnames(result)[brokenNames], function (x) { + x <- gsub('paste(',replacement = '', x = x, fixed = TRUE) + x <- gsub(pattern = ', ',replacement='_', x = x, fixed = TRUE) + x <- gsub(pattern = '_sep = \"_\")', replacement = '', x=x, fixed = TRUE) + return(x) + }) + + result <- result*values + # if (isTRUE(response>0)) { + # responses = all.vars(terms(as.formula(paste(response, '~0')))) + # result <- aggregate.Matrix(result, data[,responses,drop=FALSE], + # fun = fun.aggregate) + # } + return(result) +} diff --git a/README.md b/README.md index 1a557bd..19bd1f3 100644 --- a/README.md +++ b/README.md @@ -12,12 +12,13 @@ R package `CellSPA` can be installed as follows. BiocManager::install("SydneyBioX/CellSPA") ``` -Additional installations may be needed: -```{r} -install.packages("https://cran.r-project.org/src/contrib/Archive/Matrix.utils/Matrix.utils_0.9.8.tar.gz", type = "source", repos = NULL) -install.packages("grr") -``` +Example data files for XeniumBreastCancer1 dataset may be found here: https://drive.google.com/drive/folders/1hvNFDFteLp_S7qwDJgucLlLETL3f0rTs?usp=sharing. Please place under ``vignettes/data``. In this data directory, place the output .csv files from BIDCell (with format ``cell_outputs_{number}.csv``, and exclude ``cell_expr.csv``) into ``BIDCell_csv_output``. Place the segmentation .tif file (e.g, ``epoch_1_step_4000_connected_v3.tif``) in the data directory as well. + +## Citation + +If CellSPA has assisted you with your work, please kindly cite our paper: + +Fu, X., Lin, Y., Lin, D., Mechtersheimer, D., Wang, C., Ameen, F., Ghazanfar, S., Patrick, E., Kim, J., & Yang, J. Y. H. (2023). Biologically-informed self-supervised learning for segmentation of subcellular spatial transcriptomics data. bioRxiv, 2023.2006.2013.544733. https://doi.org/10.1101/2023.06.13.544733 -Example data files for XeniumBreastCancer1 dataset may be found here: https://drive.google.com/drive/folders/1hvNFDFteLp_S7qwDJgucLlLETL3f0rTs?usp=sharing. Please place under ``vignettes/data``. In this data directory, place the output .csv files from BIDCell (with format ``cell_outputs_{number}.csv``, and exclude ``cell_expr.csv``) into ``BIDCell_csv_output``. Place the segmentation .tif file (e.g, ``epoch_1_step_4000_connected_v3.tif``) in the data directory as well. diff --git a/docs/TODO.html b/docs/TODO.html deleted file mode 100644 index 53a33c7..0000000 --- a/docs/TODO.html +++ /dev/null @@ -1,71 +0,0 @@ - -NA • CellSPA - - -
-
- - - -
-
- - -

TODO: 1. verbose for some functions 2. read vizgen 3. read nanostring 5. visualisation comparing two metrics 6. For purity, check column name in spe. 7. For NN, check coord has rownames 8. Write import properly 9. Write functions to get output from CellSPA easily 10. Summary matrix 12. Add try for all cell shape metrics 13. Add filtering by tiff

- -
- - - -
- - - -
- -
-

Site built with pkgdown 2.0.6.

-
- -
- - - - - - - - diff --git a/docs/index.html b/docs/index.html index 4a330e9..09797e9 100644 --- a/docs/index.html +++ b/docs/index.html @@ -67,6 +67,13 @@

InstallationBiocManager::install("SydneyBioX/CellSPA") +

Example data files for XeniumBreastCancer1 dataset may be found here: https://drive.google.com/drive/folders/1hvNFDFteLp_S7qwDJgucLlLETL3f0rTs?usp=sharing. Please place under vignettes/data. In this data directory, place the output .csv files from BIDCell (with format cell_outputs_{number}.csv, and exclude cell_expr.csv) into BIDCell_csv_output. Place the segmentation .tif file (e.g, epoch_1_step_4000_connected_v3.tif) in the data directory as well.

+ +
+

Citation +

+

If CellSPA has assisted you with your work, please kindly cite our paper:

+

Fu, X., Lin, Y., Lin, D., Mechtersheimer, D., Wang, C., Ameen, F., Ghazanfar, S., Patrick, E., Kim, J., & Yang, J. Y. H. (2023). Biologically-informed self-supervised learning for segmentation of subcellular spatial transcriptomics data. bioRxiv, 2023.2006.2013.544733. https://doi.org/10.1101/2023.06.13.544733

diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 20eba93..ea9393d 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -3,5 +3,5 @@ pkgdown: 2.0.6 pkgdown_sha: ~ articles: CellSPA: CellSPA.html -last_built: 2023-05-12T02:08Z +last_built: 2023-07-11T16:46Z