-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from Cole Trapnell #110
Changes from all commits
0e56cd7
4dd7451
635dd22
0d5b9a7
6a745c5
057fd3b
423775b
f801ca6
a0a43e4
31b6153
ed7c181
926c77c
02e3501
227c6da
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,17 +64,18 @@ PLNfamily <- | |
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
## Post treatment -------------------- | ||
#' @description Update fields after optimization | ||
#' @param config a list for controlling the post-treatment. | ||
postTreatment = function(config) { | ||
nullModel <- nullModelPoisson(self$responses, self$covariates, self$offsets, self$weights) | ||
#' @param config_post a list for controlling the post-treatment. | ||
postTreatment = function(config_post, config_optim) { | ||
#nullModel <- nullModelPoisson(self$responses, self$covariates, self$offsets, self$weights) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No comparison to a null Poisson model in the general post-treatment of PLN families. Is it to improve speed / because it's not required for the post-treatments ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In our applications, the call to glm.fit inside of nullModelPoisson() can sometimes throw an exception, which ruins a perfectly good PLN fit! Since it seemed to us that nullModelPoisson() was something one only needed to do when (optionally) computing the approximate R2, we thought this call was superfluous. |
||
for (model in self$models) | ||
model$postTreatment( | ||
self$responses, | ||
self$covariates, | ||
self$offsets, | ||
self$weights, | ||
config, | ||
nullModel = nullModel | ||
config_post=config_post, | ||
config_optim=config_optim, | ||
nullModel = NULL | ||
) | ||
}, | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -62,8 +62,9 @@ PLNfit <- R6Class( | |
torch_elbo = function(data, params, index=torch_tensor(1:self$n)) { | ||
S2 <- torch_square(params$S[index]) | ||
Z <- data$O[index] + params$M[index] + torch_mm(data$X[index], params$B) | ||
A <- torch_exp(Z + .5 * S2) | ||
res <- .5 * sum(data$w[index]) * torch_logdet(private$torch_Sigma(data, params, index)) + | ||
sum(data$w[index,NULL] * (torch_exp(Z + .5 * S2) - data$Y[index] * Z - .5 * torch_log(S2))) | ||
sum(data$w[index,NULL] * (A - data$Y[index] * Z - .5 * torch_log(S2))) | ||
res | ||
}, | ||
|
||
|
@@ -80,21 +81,27 @@ PLNfit <- R6Class( | |
|
||
torch_vloglik = function(data, params) { | ||
S2 <- torch_square(params$S) | ||
Ji <- .5 * self$p - rowSums(.logfactorial(as.matrix(data$Y))) + as.numeric( | ||
.5 * torch_logdet(params$Omega) + | ||
torch_sum(data$Y * params$Z - params$A + .5 * torch_log(S2), dim = 2) - | ||
.5 * torch_sum(torch_mm(params$M, params$Omega) * params$M + S2 * torch_diag(params$Omega), dim = 2) | ||
) | ||
attr(Ji, "weights") <- as.numeric(data$w) | ||
|
||
Ji_tmp = .5 * torch_logdet(params$Omega) + | ||
torch_sum(data$Y * params$Z - params$A + .5 * torch_log(S2), dim = 2) - | ||
.5 * torch_sum(torch_mm(params$M, params$Omega) * params$M + S2 * torch_diag(params$Omega), dim = 2) | ||
Ji_tmp = Ji_tmp$cpu() | ||
Ji_tmp = as.numeric(Ji_tmp) | ||
Ji <- .5 * self$p - rowSums(.logfactorial(as.matrix(data$Y$cpu()))) + Ji_tmp | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question. Perhaps it would be better to defer that so we can use .logfactorial_torch() instead |
||
|
||
attr(Ji, "weights") <- as.numeric(data$w$cpu()) | ||
Ji | ||
}, | ||
|
||
#' @import torch | ||
torch_optimize = function(data, params, config) { | ||
|
||
#config$device = "mps" | ||
if (config$trace > 1) | ||
message (paste("optimizing with device: ", config$device)) | ||
## Conversion of data and parameters to torch tensors (pointers) | ||
data <- lapply(data, torch_tensor) # list with Y, X, O, w | ||
params <- lapply(params, torch_tensor, requires_grad = TRUE) # list with B, M, S | ||
data <- lapply(data, torch_tensor, dtype = torch_float32(), device = config$device) # list with Y, X, O, w | ||
params <- lapply(params, torch_tensor, dtype = torch_float32(), requires_grad = TRUE, device = config$device) # list with B, M, S | ||
|
||
## Initialize optimizer | ||
optimizer <- switch(config$algorithm, | ||
|
@@ -111,11 +118,14 @@ PLNfit <- R6Class( | |
batch_size <- floor(self$n/num_batch) | ||
|
||
objective <- double(length = config$num_epoch + 1) | ||
#B_old = optimizer$param_groups[[1]]$params$B$clone() | ||
for (iterate in 1:num_epoch) { | ||
B_old <- as.numeric(optimizer$param_groups[[1]]$params$B) | ||
|
||
#B_old <- as.numeric(optimizer$param_groups[[1]]$params$B) | ||
# rearrange the data each epoch | ||
permute <- torch::torch_randperm(self$n) + 1L | ||
#permute <- torch::torch_randperm(self$n, device = "cpu") + 1L | ||
permute = torch::torch_tensor(sample.int(self$n), dtype = torch_long(), device=config$device) | ||
|
||
#print (paste("num batches", num_batch)) | ||
for (batch_idx in 1:num_batch) { | ||
# here index is a vector of the indices in the batch | ||
index <- permute[(batch_size*(batch_idx - 1) + 1):(batch_idx*batch_size)] | ||
|
@@ -129,9 +139,7 @@ PLNfit <- R6Class( | |
|
||
## assess convergence | ||
objective[iterate + 1] <- loss$item() | ||
B_new <- as.numeric(optimizer$param_groups[[1]]$params$B) | ||
delta_f <- abs(objective[iterate] - objective[iterate + 1]) / abs(objective[iterate + 1]) | ||
delta_x <- sum(abs(B_old - B_new))/sum(abs(B_new)) | ||
|
||
## Error message if objective diverges | ||
if (!is.finite(loss$item())) { | ||
|
@@ -140,13 +148,14 @@ PLNfit <- R6Class( | |
} | ||
|
||
## display progress | ||
if (config$trace > 1 && (iterate %% 50 == 0)) | ||
if (config$trace > 1 && (iterate %% 50 == 1)) | ||
cat('\niteration: ', iterate, 'objective', objective[iterate + 1], | ||
'delta_f' , round(delta_f, 6), 'delta_x', round(delta_x, 6)) | ||
'delta_f' , round(delta_f, 6)) | ||
|
||
## Check for convergence | ||
#print (delta_f) | ||
if (delta_f < config$ftol_rel) status <- 3 | ||
if (delta_x < config$xtol_rel) status <- 4 | ||
#if (delta_x < config$xtol_rel) status <- 4 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason to remove the convergence check on the parameter values and to keep only the one on the ELBO value ? This will speed up the algorithm (less conditions to satisfy) but may cause the nlopt and torch implementations diverge in the result they produce (not a bad thing per se, but something we need be aware of). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I defer to you on that - we have been generally happy with larger default values of ftol_rel, triggering convergence on status 3 most of the time. |
||
if (status %in% c(3,4)) { | ||
objective <- objective[1:iterate + 1] | ||
break | ||
|
@@ -158,7 +167,10 @@ PLNfit <- R6Class( | |
params$Z <- data$O + params$M + torch_matmul(data$X, params$B) | ||
params$A <- torch_exp(params$Z + torch_pow(params$S, 2)/2) | ||
|
||
out <- lapply(params, as.matrix) | ||
out <- lapply(params, function(x) { | ||
x = x$cpu() | ||
as.matrix(x)} | ||
) | ||
out$Ji <- private$torch_vloglik(data, params) | ||
out$monitoring <- list( | ||
objective = objective, | ||
|
@@ -174,7 +186,7 @@ PLNfit <- R6Class( | |
## PRIVATE METHODS FOR VARIANCE OF THE ESTIMATORS | ||
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
|
||
variance_variational = function(X) { | ||
variance_variational = function(X, config = config_default_nlopt) { | ||
## Variance of B for n data points | ||
fisher <- Matrix::bdiag(lapply(1:self$p, function(j) { | ||
crossprod(X, private$A[, j] * X) # t(X) %*% diag(A[, i]) %*% X | ||
|
@@ -204,8 +216,56 @@ PLNfit <- R6Class( | |
invisible(list(var_B = var_B, var_Omega = var_Omega)) | ||
}, | ||
|
||
compute_vcov_from_resamples = function(resamples){ | ||
# compute the covariance of the parameters | ||
get_cov_mat = function(data, cell_group) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I may be mistaken, but There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Vestigial code from debugging - can be removed |
||
|
||
cov_matrix = cov(data) | ||
rownames(cov_matrix) = paste0(cell_group, "_", rownames(cov_matrix)) | ||
colnames(cov_matrix) = paste0(cell_group, "_", colnames(cov_matrix)) | ||
return(cov_matrix) | ||
} | ||
|
||
|
||
B_list = resamples %>% map("B") | ||
#print (B_list) | ||
vcov_B = lapply(seq(1, ncol(private$B)), function(B_col){ | ||
param_ests_for_col = B_list %>% map(~.x[, B_col]) | ||
param_ests_for_col = do.call(rbind, param_ests_for_col) | ||
#print (param_ests_for_col) | ||
row_vcov = cov(param_ests_for_col) | ||
}) | ||
#print ("vcov blocks") | ||
#print (vcov_B) | ||
|
||
#B_vcov <- resamples %>% map("B") %>% map(~( . )) %>% reduce(cov) | ||
|
||
#var_jack <- jacks %>% map("B") %>% map(~( (. - B_jack)^2)) %>% reduce(`+`) %>% | ||
# `dimnames<-`(dimnames(private$B)) | ||
#B_hat <- private$B[,] ## strips attributes while preserving names | ||
|
||
vcov_B = Matrix::bdiag(vcov_B) %>% as.matrix() | ||
|
||
rownames(vcov_B) <- colnames(vcov_B) <- | ||
expand.grid(covariates = rownames(private$B), | ||
responses = colnames(private$B)) %>% rev() %>% | ||
## Hack to make sure that species is first and varies slowest | ||
apply(1, paste0, collapse = "_") | ||
|
||
#print (pheatmap::pheatmap(vcov_B, cluster_rows=FALSE, cluster_cols=FALSE)) | ||
|
||
|
||
#names = lapply(bootstrapped_df$cov_mat, function(m){ colnames(m)}) %>% unlist() | ||
#rownames(bootstrapped_vhat) = names | ||
#colnames(bootstrapped_vhat) = names | ||
|
||
vcov_B = methods::as(vcov_B, "dgCMatrix") | ||
|
||
return(vcov_B) | ||
}, | ||
|
||
variance_jackknife = function(Y, X, O, w, config = config_default_nlopt) { | ||
jacks <- future.apply::future_lapply(seq_len(self$n), function(i) { | ||
jacks <- lapply(seq_len(self$n), function(i) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have no strong opinion on this one as @jchiquet was the one who wrote this part but is there a reason prefer lapply to future_lapply (one less dependency ? simpler to use ?). A nice thing about future_lapply is that it is backend-agnostic and can be used for several parallalelization paradigms. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, yes, agree it would be wonderful to keep this. The issue is that on machines that use OpenBLAS with a multithreaded backend, using future can deadlock the session. A workaround is to wrap calls to future with something like this:
Then you do work with future and then:
We didn't add this because we didn't want to add a new dependency on RhpcBLASctl to the package, but you could do if you want to be able to do linear algebra inside of functions called by future |
||
data <- list(Y = Y[-i, , drop = FALSE], | ||
X = X[-i, , drop = FALSE], | ||
O = O[-i, , drop = FALSE], | ||
|
@@ -215,7 +275,7 @@ PLNfit <- R6Class( | |
config = config) | ||
optim_out <- do.call(private$optimizer$main, args) | ||
optim_out[c("B", "Omega")] | ||
}, future.seed = TRUE) | ||
}) | ||
|
||
B_jack <- jacks %>% map("B") %>% reduce(`+`) / self$n | ||
var_jack <- jacks %>% map("B") %>% map(~( (. - B_jack)^2)) %>% reduce(`+`) %>% | ||
|
@@ -224,6 +284,9 @@ PLNfit <- R6Class( | |
attr(private$B, "bias") <- (self$n - 1) * (B_jack - B_hat) | ||
attr(private$B, "variance_jackknife") <- (self$n - 1) / self$n * var_jack | ||
|
||
vcov_jacks = private$compute_vcov_from_resamples(jacks) | ||
attr(private$B, "vcov_jackknife") <- vcov_jacks | ||
|
||
Omega_jack <- jacks %>% map("Omega") %>% reduce(`+`) / self$n | ||
var_jack <- jacks %>% map("Omega") %>% map(~( (. - Omega_jack)^2)) %>% reduce(`+`) %>% | ||
`dimnames<-`(dimnames(private$Omega)) | ||
|
@@ -234,23 +297,37 @@ PLNfit <- R6Class( | |
|
||
variance_bootstrap = function(Y, X, O, w, n_resamples = 100, config = config_default_nlopt) { | ||
resamples <- replicate(n_resamples, sample.int(self$n, replace = TRUE), simplify = FALSE) | ||
boots <- future.apply::future_lapply(resamples, function(resample) { | ||
boots <- lapply(resamples, function(resample) { | ||
data <- list(Y = Y[resample, , drop = FALSE], | ||
X = X[resample, , drop = FALSE], | ||
O = O[resample, , drop = FALSE], | ||
w = w[resample]) | ||
#print (config$torch_device) | ||
#print (config) | ||
if (config$algorithm %in% c("RPROP", "RMSPROP", "ADAM", "ADAGRAD")) # hack, to know if we're doing torch or not | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we add a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That would be better. Would also be good to be able to specify the torch device (e.g. "mps", "cuda", etc) |
||
data <- lapply(data, torch_tensor, device = config$device) # list with Y, X, O, w | ||
|
||
#print (data$Y$device) | ||
|
||
args <- list(data = data, | ||
params = list(B = private$B, M = matrix(0,self$n,self$p), S = private$S[resample, ]), | ||
config = config) | ||
if (config$algorithm %in% c("RPROP", "RMSPROP", "ADAM", "ADAGRAD")) # hack, to know if we're doing torch or not | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as previous comment |
||
args$params <- lapply(args$params, torch_tensor, requires_grad = TRUE, device = config$device) # list with B, M, S | ||
|
||
optim_out <- do.call(private$optimizer$main, args) | ||
#print (optim_out) | ||
optim_out[c("B", "Omega", "monitoring")] | ||
}, future.seed = TRUE) | ||
}) | ||
|
||
B_boots <- boots %>% map("B") %>% reduce(`+`) / n_resamples | ||
attr(private$B, "variance_bootstrap") <- | ||
boots %>% map("B") %>% map(~( (. - B_boots)^2)) %>% reduce(`+`) %>% | ||
`dimnames<-`(dimnames(private$B)) / n_resamples | ||
|
||
vcov_boots = private$compute_vcov_from_resamples(boots) | ||
attr(private$B, "vcov_bootstrap") <- vcov_boots | ||
|
||
Omega_boots <- boots %>% map("Omega") %>% reduce(`+`) / n_resamples | ||
attr(private$Omega, "variance_bootstrap") <- | ||
boots %>% map("Omega") %>% map(~( (. - Omega_boots)^2)) %>% reduce(`+`) %>% | ||
|
@@ -386,7 +463,7 @@ PLNfit <- R6Class( | |
#' * variational_var boolean indicating whether variational Fisher information matrix should be computed to estimate the variance of the model parameters (highly underestimated). Default is FALSE. | ||
#' * rsquared boolean indicating whether approximation of R2 based on deviance should be computed. Default is TRUE | ||
#' * trace integer for verbosity. should be > 1 to see output in post-treatments | ||
postTreatment = function(responses, covariates, offsets, weights = rep(1, nrow(responses)), config, nullModel = NULL) { | ||
postTreatment = function(responses, covariates, offsets, weights = rep(1, nrow(responses)), config_post, config_optim, nullModel = NULL) { | ||
## PARAMATERS DIMNAMES | ||
## Set names according to those of the data matrices. If missing, use sensible defaults | ||
if (is.null(colnames(responses))) | ||
|
@@ -403,24 +480,27 @@ PLNfit <- R6Class( | |
|
||
## OPTIONAL POST-TREATMENT (potentially costly) | ||
## 1. compute and store approximated R2 with Poisson-based deviance | ||
if (config$rsquared) { | ||
if(config$trace > 1) cat("\n\tComputing bootstrap estimator of the variance...") | ||
if (config_post$rsquared) { | ||
if(config_post$trace > 1) cat("\n\tComputing approximate R^2...") | ||
private$approx_r2(responses, covariates, offsets, weights, nullModel) | ||
} | ||
## 2. compute and store matrix of standard variances for B and Omega with rough variational approximation | ||
if (config$variational_var) { | ||
if(config$trace > 1) cat("\n\tComputing variational estimator of the variance...") | ||
private$variance_variational(covariates) | ||
if (config_post$variational_var) { | ||
if(config_post$trace > 1) cat("\n\tComputing variational estimator of the variance...") | ||
private$variance_variational(covariates, config = config_optim) | ||
} | ||
## 3. Jackknife estimation of bias and variance | ||
if (config$jackknife) { | ||
if(config$trace > 1) cat("\n\tComputing jackknife estimator of the variance...") | ||
private$variance_jackknife(responses, covariates, offsets, weights) | ||
if (config_post$jackknife) { | ||
if(config_post$trace > 1) cat("\n\tComputing jackknife estimator of the variance...") | ||
private$variance_jackknife(responses, covariates, offsets, weights, config = config_optim) | ||
} | ||
## 4. Bootstrap estimation of variance | ||
if (config$bootstrap > 0) { | ||
if(config$trace > 1) cat("\n\tComputing bootstrap estimator of the variance...") | ||
private$variance_bootstrap(responses, covariates, offsets, weights, config$bootstrap) | ||
if (config_post$bootstrap > 0) { | ||
if(config_post$trace > 1) { | ||
cat("\n\tComputing bootstrap estimator of the variance...") | ||
#print (str(config_optim)) | ||
} | ||
private$variance_bootstrap(responses, covariates, offsets, weights, n_resamples=config_post$bootstrap, config = config_optim) | ||
} | ||
}, | ||
|
||
|
@@ -815,11 +895,11 @@ PLNfit_fixedcov <- R6Class( | |
#' * bootstrap integer indicating the number of bootstrap resamples generated to evaluate the variance of the model parameters. Default is 0 (inactivated). | ||
#' * variational_var boolean indicating whether variational Fisher information matrix should be computed to estimate the variance of the model parameters (highly underestimated). Default is FALSE. | ||
#' * rsquared boolean indicating whether approximation of R2 based on deviance should be computed. Default is TRUE | ||
postTreatment = function(responses, covariates, offsets, weights = rep(1, nrow(responses)), config, nullModel = NULL) { | ||
super$postTreatment(responses, covariates, offsets, weights, config, nullModel) | ||
postTreatment = function(responses, covariates, offsets, weights = rep(1, nrow(responses)), config_post, config_optim, nullModel = NULL) { | ||
super$postTreatment(responses, covariates, offsets, weights, config_post, config_optim, nullModel) | ||
## 6. compute and store matrix of standard variances for B with sandwich correction approximation | ||
if (config$sandwich_var) { | ||
if(config$trace > 1) cat("\n\tComputing sandwich estimator of the variance...") | ||
if (config_post$sandwich_var) { | ||
if(config_post$trace > 1) cat("\n\tComputing sandwich estimator of the variance...") | ||
private$vcov_sandwich_B(responses, covariates) | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why the additional comma if its the last element of the list?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oops :)