From b89ecd2da65fbe7950d2eb28d7b40f6ac8757bd6 Mon Sep 17 00:00:00 2001
From: yingxinlin <yingxinlindsay@gmail.com>
Date: Tue, 11 Jul 2023 13:48:29 -0400
Subject: [PATCH] remove dependencies on MatrixUtilis

---
 DESCRIPTION      |   2 +-
 R/readMethods.R  |   2 +-
 R/utils.R        | 101 +++++++++++++++++++++++++++++++++++++++++++++++
 README.md        |  13 +++---
 docs/TODO.html   |  71 ---------------------------------
 docs/index.html  |   7 ++++
 docs/pkgdown.yml |   2 +-
 7 files changed, 118 insertions(+), 80 deletions(-)
 delete mode 100644 docs/TODO.html

diff --git a/DESCRIPTION b/DESCRIPTION
index f9b1162..4e47abd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -38,7 +38,7 @@ Imports:
     maptools,
     entropy,
     data.table,
-    Matrix.utils
+    grr
 Suggests: 
     BiocStyle,
     knitr,
diff --git a/R/readMethods.R b/R/readMethods.R
index a616ac4..eafa1fc 100644
--- a/R/readMethods.R
+++ b/R/readMethods.R
@@ -125,7 +125,7 @@ readBIDCell <- function(data_dir,
     if (sum(duplicated_cell_id) > 0) {
         warning(paste("There are", sum(duplicated_cell_id),
                       "cells with duplicated cell id"))
-        data <- Matrix.utils::aggregate.Matrix(data, cell_outputs$cell_id)
+        data <- aggregate.Matrix(data, cell_outputs$cell_id)
         rownames(data) <- paste("Cell", rownames(data), sep = "_")
         meta <- meta[!duplicated_cell_id, ]
         rownames(meta) <- paste("Cell", meta$cell_id, sep = "_")
diff --git a/R/utils.R b/R/utils.R
index dbfac21..5f486bb 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -43,4 +43,105 @@ subset <- function(spe, col_idx) {
 }
 
 
+# From Matrix.utils::aggregate.Matrix() function, which has been archived on CRAN
+
+aggregate.Matrix <- function(x, groupings = NULL, form = NULL, fun = 'sum', ...) {
+    if (!methods::is(x,'Matrix')) {
+        x <- Matrix::Matrix(as.matrix(x), sparse = TRUE)
+    }
+    if (fun=='count') {
+        x <- x != 0
+    }
+    groupings2 <- groupings
+    if (!methods::is(groupings2,'data.frame')) {
+        groupings2 <- as(groupings2,'data.frame')
+    }
+
+    groupings2 <- data.frame(lapply(groupings2, as.factor))
+    groupings2 <- data.frame(interaction(groupings2,sep = '_'))
+    colnames(groupings2) <- 'A'
+    if(is.null(form)) {
+        form <- stats::as.formula('~0+.')
+    }
+
+    form <- stats::as.formula(form)
+    mapping <- dMcast(groupings2,form)
+    colnames(mapping) <- substring(colnames(mapping),2)
+    result <- Matrix::t(mapping) %*% x
+
+
+    attr(result,'crosswalk') <- grr::extract(groupings,match(rownames(result),groupings2$A))
+    return(result)
+}
+
+# From Matrix.utils::dMcast() function, which has been archived on CRAN
+
+dMcast <- function(data, formula, fun.aggregate='sum',
+                   value.var=NULL, as.factors=FALSE,
+                   factor.nas=TRUE, drop.unused.levels=TRUE) {
+    values <- 1
+    if( !is.null(value.var)) {
+        values<-data[,value.var]
+    }
+
+    alltms <- stats::terms(formula, data = data)
+    response <- rownames(attr(alltms,'factors'))[attr(alltms,'response')]
+    tm <- attr(alltms,"term.labels")
+    interactionsIndex <- grep(':',tm)
+    interactions <- tm[interactionsIndex]
+    simple <- setdiff(tm,interactions)
+    i2 <- strsplit(interactions,':')
+    newterms <- unlist(lapply(i2, function (x) {
+        paste("paste(", paste(x,collapse=','),",","sep='_'",")")
+    }))
+    newterms <- c(simple,newterms)
+    newformula <- stats::as.formula(paste('~0+',paste(newterms,collapse='+')))
+    allvars <- all.vars(alltms)
+    data <- data[,c(allvars),drop=FALSE]
+    if (as.factors) {
+        data <- data.frame(lapply(data,as.factor))
+    }
+
+    characters <- unlist(lapply(data,is.character))
+    data[,characters] <- lapply(data[,characters,drop = FALSE], as.factor)
+    factors <- unlist(lapply(data, is.factor))
+    #Prevents errors with 1 or fewer distinct levels
+    data[, factors] <- lapply(data[, factors, drop = FALSE], function (x) {
+        if(factor.nas) {
+            if(any(is.na(x))) {
+                levels(x) <- c(levels(x),'NA')
+                x[is.na(x)] <- 'NA'
+            }
+        }
+
+        if (drop.unused.levels) {
+            if(nlevels(x) != length(stats::na.omit(unique(x)))) {
+                x <- factor(as.character(x))
+            }
+        }
+
+
+        y <- stats::contrasts(x, contrasts = FALSE, sparse = TRUE)
+        attr(x,'contrasts') <- y
+        return(x)
+    })
+    #Allows NAs to pass
+    attr(data,'na.action') <- stats::na.pass
+    result <- Matrix::sparse.model.matrix(newformula, data, drop.unused.levels = FALSE, row.names=FALSE)
+    brokenNames <- grep('paste(',colnames(result),fixed = TRUE)
+    colnames(result)[brokenNames] <- lapply(colnames(result)[brokenNames], function (x) {
+        x <- gsub('paste(',replacement = '', x = x, fixed = TRUE)
+        x <- gsub(pattern = ', ',replacement='_', x = x, fixed = TRUE)
+        x <- gsub(pattern = '_sep = \"_\")', replacement = '', x=x, fixed = TRUE)
+        return(x)
+    })
+
+    result <- result*values
+    # if (isTRUE(response>0)) {
+    #     responses = all.vars(terms(as.formula(paste(response, '~0'))))
+    #     result <- aggregate.Matrix(result, data[,responses,drop=FALSE],
+    #                                fun = fun.aggregate)
+    # }
+    return(result)
+}
 
diff --git a/README.md b/README.md
index 1a557bd..19bd1f3 100644
--- a/README.md
+++ b/README.md
@@ -12,12 +12,13 @@ R package `CellSPA` can be installed as follows.
 BiocManager::install("SydneyBioX/CellSPA")
 ```
 
-Additional installations may be needed:
 
-```{r}
-install.packages("https://cran.r-project.org/src/contrib/Archive/Matrix.utils/Matrix.utils_0.9.8.tar.gz", type = "source", repos = NULL)
-install.packages("grr")
-```
+Example data files for XeniumBreastCancer1 dataset may be found here: https://drive.google.com/drive/folders/1hvNFDFteLp_S7qwDJgucLlLETL3f0rTs?usp=sharing. Please place under ``vignettes/data``. In this data directory, place the output .csv files from BIDCell (with format ``cell_outputs_{number}.csv``, and exclude ``cell_expr.csv``) into ``BIDCell_csv_output``. Place the segmentation .tif file (e.g, ``epoch_1_step_4000_connected_v3.tif``) in the data directory as well.
+
+## Citation
+
+If CellSPA has assisted you with your work, please kindly cite our paper:
+
+Fu, X., Lin, Y., Lin, D., Mechtersheimer, D., Wang, C., Ameen, F., Ghazanfar, S., Patrick, E., Kim, J., & Yang, J. Y. H. (2023). Biologically-informed self-supervised learning for segmentation of subcellular spatial transcriptomics data. bioRxiv, 2023.2006.2013.544733. https://doi.org/10.1101/2023.06.13.544733
 
 
-Example data files for XeniumBreastCancer1 dataset may be found here: https://drive.google.com/drive/folders/1hvNFDFteLp_S7qwDJgucLlLETL3f0rTs?usp=sharing. Please place under ``vignettes/data``. In this data directory, place the output .csv files from BIDCell (with format ``cell_outputs_{number}.csv``, and exclude ``cell_expr.csv``) into ``BIDCell_csv_output``. Place the segmentation .tif file (e.g, ``epoch_1_step_4000_connected_v3.tif``) in the data directory as well.
diff --git a/docs/TODO.html b/docs/TODO.html
deleted file mode 100644
index 53a33c7..0000000
--- a/docs/TODO.html
+++ /dev/null
@@ -1,71 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>NA • CellSPA</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="bootstrap-toc.css"><script src="bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="pkgdown.css" rel="stylesheet"><script src="pkgdown.js"></script><meta property="og:title" content="NA"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body data-spy="scroll" data-target="#toc">
-    
-
-    <div class="container template-title-body">
-      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
-  <div class="container">
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <span class="navbar-brand">
-        <a class="navbar-link" href="index.html">CellSPA</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.1.0</span>
-      </span>
-    </div>
-
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav"><li>
-  <a href="articles/CellSPA.html">Get started</a>
-</li>
-<li>
-  <a href="reference/index.html">Reference</a>
-</li>
-      </ul><ul class="nav navbar-nav navbar-right"></ul></div><!--/.nav-collapse -->
-  </div><!--/.container -->
-</div><!--/.navbar -->
-
-      
-
-      </header><div class="row">
-  <div class="contents col-md-9">
-    <div class="page-header">
-      <h1>NA</h1>
-    </div>
-
-<p>TODO: 1. verbose for some functions 2. read vizgen 3. read nanostring 5. visualisation comparing two metrics 6. For purity, check column name in spe. 7. For NN, check coord has rownames 8. Write import properly 9. Write functions to get output from CellSPA easily 10. Summary matrix 12. Add try for all cell shape metrics 13. Add filtering by tiff</p>
-
-  </div>
-
-  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
-    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
-    </nav></div>
-
-</div>
-
-
-
-      <footer><div class="copyright">
-  <p></p><p>Developed by Yingxin Lin, Daniel Mechtersheimer.</p>
-</div>
-
-<div class="pkgdown">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
-</div>
-
-      </footer></div>
-
-  
-
-
-  
-
-  </body></html>
-
diff --git a/docs/index.html b/docs/index.html
index 4a330e9..09797e9 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -67,6 +67,13 @@ <h2 id="installation">Installation<a class="anchor" aria-label="anchor" href="#i
 </h2>
 <p>R package <code>CellSPA</code> can be installed as follows.</p>
 <pre class="{r}"><code><span><span class="fu">BiocManager</span><span class="fu">::</span><span class="fu"><a href="https://rdrr.io/pkg/BiocManager/man/install.html" class="external-link">install</a></span><span class="op">(</span><span class="st">"SydneyBioX/CellSPA"</span><span class="op">)</span></span></code></pre>
+<p>Example data files for XeniumBreastCancer1 dataset may be found here: <a href="https://drive.google.com/drive/folders/1hvNFDFteLp_S7qwDJgucLlLETL3f0rTs?usp=sharing" class="external-link uri">https://drive.google.com/drive/folders/1hvNFDFteLp_S7qwDJgucLlLETL3f0rTs?usp=sharing</a>. Please place under <code>vignettes/data</code>. In this data directory, place the output .csv files from BIDCell (with format <code>cell_outputs_{number}.csv</code>, and exclude <code>cell_expr.csv</code>) into <code>BIDCell_csv_output</code>. Place the segmentation .tif file (e.g, <code>epoch_1_step_4000_connected_v3.tif</code>) in the data directory as well.</p>
+</div>
+<div class="section level2">
+<h2 id="citation">Citation<a class="anchor" aria-label="anchor" href="#citation"></a>
+</h2>
+<p>If CellSPA has assisted you with your work, please kindly cite our paper:</p>
+<p>Fu, X., Lin, Y., Lin, D., Mechtersheimer, D., Wang, C., Ameen, F., Ghazanfar, S., Patrick, E., Kim, J., &amp; Yang, J. Y. H. (2023). Biologically-informed self-supervised learning for segmentation of subcellular spatial transcriptomics data. bioRxiv, 2023.2006.2013.544733. <a href="https://doi.org/10.1101/2023.06.13.544733" class="external-link uri">https://doi.org/10.1101/2023.06.13.544733</a></p>
 </div>
 </div>
   </div>
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index 20eba93..ea9393d 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -3,5 +3,5 @@ pkgdown: 2.0.6
 pkgdown_sha: ~
 articles:
   CellSPA: CellSPA.html
-last_built: 2023-05-12T02:08Z
+last_built: 2023-07-11T16:46Z