diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c59e57b6..b08927db 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,13 +3,15 @@ # R specific hooks: https://github.com/lorenzwalthert/precommit repos: - repo: https://github.com/lorenzwalthert/precommit - rev: v0.3.2.9003 + rev: v0.3.2.9019 hooks: - id: style-files args: [] - id: roxygenize additional_dependencies: - knitr + - stan-dev/cmdstanr@*release + - mvtnorm # codemeta must be above use-tidy-description when both are used # - id: codemeta-description-updated - id: use-tidy-description @@ -49,11 +51,11 @@ repos: - id: no-browser-statement - id: deps-in-desc - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.0.0-alpha.3 + rev: v3.0.1 hooks: - id: prettier - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v4.4.0 hooks: - id: check-added-large-files args: ["--maxkb=200"] @@ -79,7 +81,7 @@ repos: files: '\.Rhistory|\.RData|\.Rds|\.rds$' # `exclude: ` to allow committing specific files. - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.32.2 + rev: v0.34.0 hooks: - id: markdownlint args: diff --git a/DESCRIPTION b/DESCRIPTION index 4dfd5a24..86903442 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -42,7 +42,10 @@ Imports: glue, methods, graphics, - posterior + posterior, + generics, + Matrix, + mvtnorm Suggests: survival, flexsurv, @@ -121,6 +124,7 @@ Collate: 'sim_is_null_effect_covered.R' 'sim_is_true_effect_covered.R' 'sim_samplesize.R' + 'simulate_data.R' 'simvar_class.R' 'trim_data_matrix.R' 'uniform_prior.R' diff --git a/NAMESPACE b/NAMESPACE index 062d3593..8351686a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,22 +1,28 @@ # Generated by roxygen2: do not edit by hand export(add_covariates) +export(baseline_covariates) export(bernoulli_prior) export(beta_prior) export(bin_var) +export(binary_cutoff) export(borrowing_details) export(cauchy_prior) export(check_cmdstanr) export(check_data_matrix_has_columns) export(cont_var) +export(covariance_matrix) export(create_analysis_obj) +export(create_baseline_object) export(create_data_matrix) export(create_simulation_obj) export(eval_constraints) export(exp_surv_dist) export(exponential_prior) export(gamma_prior) +export(generate) export(get_cmd_stan_models) +export(get_quantiles) export(get_results) export(get_vars) export(half_cauchy_prior) @@ -41,9 +47,12 @@ export(treatment_details) export(uniform_prior) export(variable_dictionary) export(weib_ph_surv_dist) +exportMethods(generate) exportMethods(mcmc_sample) import(checkmate) import(cmdstanr) +importFrom(Matrix,nearPD) +importFrom(generics,generate) importFrom(graphics,axis) importFrom(graphics,lines) importFrom(graphics,par) @@ -53,7 +62,9 @@ importFrom(methods,callNextMethod) importFrom(methods,is) importFrom(methods,new) importFrom(methods,show) +importFrom(mvtnorm,rmvnorm) importFrom(stats,complete.cases) importFrom(stats,formula) importFrom(stats,model.matrix) +importFrom(stats,pnorm) importFrom(stats,setNames) diff --git a/R/cmdstan.R b/R/cmdstan.R index feb46d72..5a94df06 100644 --- a/R/cmdstan.R +++ b/R/cmdstan.R @@ -1,18 +1,21 @@ #' Check Stan #' #' @description Check whether `cmdstanr` is available and prints version and logistic example. +#' @param check_sampling Compile and sample from the "logistic" example model. #' #' @export #' #' @examples #' check_cmdstanr() -check_cmdstanr <- function() { +check_cmdstanr <- function(check_sampling = FALSE) { cat("cmdstan version: \n") print(cmdstan_version()) cat("Example program:\n") print_example_program(example = c("logistic")) - cat("Example program results:\n") - cmdstanr_example("logistic", quiet = FALSE, chains = 1) + if (check_sampling) { + cat("Example program results:\n") + cmdstanr_example("logistic", quiet = FALSE, chains = 1) + } } diff --git a/R/generics.R b/R/generics.R index b57d0ad0..488b2d4c 100644 --- a/R/generics.R +++ b/R/generics.R @@ -1,4 +1,3 @@ - #' Plot Prior Objects #' #' Plot prior distributions as densities. Continuous distributions are plotted as curves and @@ -85,3 +84,13 @@ setGeneric("get_results", function(object) standardGeneric("get_results")) #' @export #' setGeneric("get_cmd_stan_models", function(object) standardGeneric("get_cmd_stan_models")) + +#' Generate Data from Object +#' +#' @param x object +#' @param ... Other arguments passed to methods +#' +#' @rdname generate +#' +#' @export +setGeneric("generate", function(x, ...) standardGeneric("generate")) diff --git a/R/simulate_data.R b/R/simulate_data.R new file mode 100644 index 00000000..d72aadf6 --- /dev/null +++ b/R/simulate_data.R @@ -0,0 +1,359 @@ +#' Create Covariance Matrix +#' +#' @param diag Diagonal entries of the covariance matrix +#' @param upper_tri Upper triangle entries of the matrix, specified column wise. +#' +#' @return A symmetric matrix with `diag` values on the main diagonal and +#' `upper_tri` values in the lower and upper triangles. +#' @export +#' +#' @examples +#' m1 <- covariance_matrix(c(1, 1, 1, 1), c(.8, .3, .8, 0, 0, 0)) +#' m1 +#' mvtnorm::rmvnorm(5, mean = c(0, 0, 0, 0), sigma = m1) +#' +#' # No correlation +#' covariance_matrix(c(1, 2, 3)) +#' +#' @importFrom Matrix nearPD +covariance_matrix <- function(diag, upper_tri) { + assert_numeric(diag, lower = 0) + dim <- length(diag) + if (missing(upper_tri)) upper_tri <- rep(0, sum(seq.int(1, dim - 1))) + + if (dim == 1) { + cov_mat <- matrix(diag) + } else if (dim > 1) { + assert_numeric(upper_tri, len = sum(seq.int(1, dim - 1))) + cov_mat <- diag(diag, nrow = dim, ncol = dim) + cov_mat[upper.tri(cov_mat)] <- upper_tri + cov_mat[lower.tri(cov_mat)] <- t(cov_mat)[lower.tri(cov_mat)] + } + if (any(eigen(cov_mat, only.values = TRUE)$values < 0)) { + warning( + "Provided parameters do not define a semi-positive definite matrix. ", + "Finding nearest positive definite matrix with Matrix::nearPD()." + ) + cov_mat <- Matrix::nearPD(cov_mat, ensureSymmetry = TRUE, keepDiag = TRUE, base.matrix = TRUE)$mat + } + cov_mat +} + + +#' Specify Correlated Baseline Covariates +#' +#' Set parameters to generate correlated multivariate normal data for internal and external patients. +#' +#' @param names character vector of variable names. +#' @param means_int numeric vector of means for internal patients. Must have same length as `names` +#' @param means_ext numeric vector of means for external patients. Must have same length as `names` +#' @param covariance_int variance-covariance matrix for generating multivariate normal for internal patients. +#' Must be square matrix with same number of rows and `length(names)` +#' @param covariance_ext variance-covariance matrix for generating multivariate normal data for external patients. +#' Must be square matrix with same number of rows and `length(names)` +#' +#' @return [BaselineObject][BaselineObject-class] to build simulated dataset +#' @export +#' +#' @examples +#' corr_covs <- baseline_covariates( +#' names = c("b1", "b2"), +#' means_int = c(5, 25), +#' covariance_int = covariance_matrix(diag = c(1, 1), upper_tri = 0.4) +#' ) +baseline_covariates <- function(names, + means_int, + means_ext = means_int, + covariance_int, + covariance_ext = covariance_int) { + assert_character(names) + n <- length(names) + assert_numeric(means_int, finite = TRUE, len = n, any.missing = FALSE) + assert_numeric(means_ext, finite = TRUE, len = n, any.missing = FALSE) + assert_matrix(covariance_int, nrows = n, ncols = n, any.missing = FALSE) + assert_matrix(covariance_ext, nrows = n, ncols = n, any.missing = FALSE) + + .correlated_covariates( + names = names, + means_int = means_int, + means_ext = means_ext, + covariance_int = covariance_int, + covariance_ext = covariance_ext + ) +} + + +.correlated_covariates <- setClass( + "CorrelatedCovariates", + slots = c( + names = "character", + means_int = "numeric", + means_ext = "numeric", + covariance_int = "matrix", + covariance_ext = "matrix" + ) +) + + +# BaselineObject ----- + +#' `BaselineObject` class for data simulation +#' +#' @slot n_trt_int integer. Number of internal treated patients +#' @slot n_ctrl_int integer. Number of internal control patients +#' @slot n_ctrl_ext integer. Number of external control patients +#' @slot covariates list. List of correlated covariates objects, see [baseline_covariates()] +#' @slot transformations list. List of named transformation functions. +#' +.baseline_object <- setClass( + "BaselineObject", + slots = c( + n_trt_int = "integer", + n_ctrl_int = "integer", + n_ctrl_ext = "integer", + covariates = "list", + transformations = "list" + ) +) + +#' Create Baseline Data Simulation Object +#' +#' @param n_trt_int Number of internal treated patients +#' @param n_ctrl_int Number of internal control patients +#' @param n_ctrl_ext Number of external control patients +#' @param covariates List of correlated covariates objects, see [baseline_covariates()] +#' @param transformations List of named transformation functions. +#' +#' @details +#' Transformation functions are evaluated in order and create or overwrite a column +#' in the data.frame with that name. The function should take a `data.frame` (specifically +#' a `BaselineDataFrame` object from `generate(BaselineObject)`) and return a vector with +#' length identical to the total number of patients. +#' The `@BaselineObject` slot may be accessed directly or with [get_quantiles()] to +#' create transformations. See [binary_cutoff()] +#' +#' @return A [BaselineObject][BaselineObject-class] +#' @export +#' +#' @examples +#' bl_no_covs <- create_baseline_object( +#' n_trt_int = 100, +#' n_ctrl_int = 50, +#' n_ctrl_ext = 100 +#' ) +#' +#' +#' bl_biomarkers <- create_baseline_object( +#' n_trt_int = 100, +#' n_ctrl_int = 50, +#' n_ctrl_ext = 100, +#' covariates = baseline_covariates( +#' c("b1", "b2", "b3"), +#' means_int = c(0, 0, 0), +#' covariance_int = covariance_matrix(c(1, 1, 1), c(.8, .3, .8)) +#' ), +#' transformations = list( +#' exp_b1 = function(data) exp(data$b1), +#' b2 = binary_cutoff("b2", int_cutoff = 0.7, ext_cutoff = 0.5) +#' ) +#' ) +#' +create_baseline_object <- function(n_trt_int, n_ctrl_int, n_ctrl_ext, covariates, transformations) { + assert_integerish(n_trt_int, len = 1, lower = 1) + assert_integerish(n_ctrl_int, len = 1, lower = 1) + assert_integerish(n_ctrl_ext, len = 1, lower = 1) + + if (!missing(covariates)) { + if (is(covariates, "CorrelatedCovariates")) covariates <- list(covariates) + assert_list(covariates) + } else { + covariates <- list(.correlated_covariates()) + } + + if (!missing(transformations)) { + assert_list(transformations) + } else { + transformations <- list() + } + + .baseline_object( + n_trt_int = as.integer(n_trt_int), + n_ctrl_int = as.integer(n_ctrl_int), + n_ctrl_ext = as.integer(n_ctrl_ext), + covariates = covariates, + transformations = transformations + ) +} + +#' @importFrom generics generate +#' @importFrom mvtnorm rmvnorm +# nolint start +generate.BaselineObject <- function(x, ...) { + # nolint end + arm_data <- data.frame( + patid = seq_len(x@n_trt_int + x@n_ctrl_int + x@n_ctrl_ext), + ext = rep(c(0, 1), times = c(x@n_trt_int + x@n_ctrl_int, x@n_ctrl_ext)), + trt = rep(c(1, 0), times = c(x@n_trt_int, x@n_ctrl_int + x@n_ctrl_ext)) + ) + + # If any covariates are defined, generate multivariate normal data and combine with arm data + cov_defined <- vapply(x@covariates, function(x) length(x@names) > 0, logical(1L)) + if (any(cov_defined)) { + cor_data_list <- lapply( + x@covariates[cov_defined], + function(cor_cov, n_int = x@n_trt_int + x@n_ctrl_int, n_ext = x@n_ctrl_ext) { + mvnorm_data <- rbind( + mvtnorm::rmvnorm(n = n_int, mean = cor_cov@means_int, sigma = cor_cov@covariance_int), + mvtnorm::rmvnorm(n = n_ext, mean = cor_cov@means_ext, sigma = cor_cov@covariance_ext) + ) + colnames(mvnorm_data) <- cor_cov@names + as.data.frame(mvnorm_data) + } + ) + arm_data <- cbind(arm_data, do.call("cbind", cor_data_list)) + } + + bl_df <- .baseline_dataframe(arm_data, BaselineObject = x) + + # For each named transformation, either create a new column or overwrite + for (i in names(x@transformations)) { + var_index <- match(i, bl_df@names) + if (!is.na(var_index)) { + bl_df@.Data[[var_index]] <- x@transformations[[i]](bl_df) + } else { + next_index <- length(bl_df@names) + 1 + bl_df@.Data[[next_index]] <- x@transformations[[i]](bl_df) + bl_df@names[next_index] <- i + } + } + bl_df +} + + +#' Generate Data for a `BaselineObject` +#' +#' @param x a `BaselineObject` object created by [create_baseline_object] +#' @param ... additional parameters are ignored +#' +#' @return A [BaselineDataFrame][psborrow2::BaselineDataFrame-class] object +#' @export +#' +#' @examples +#' bl_biomarkers <- create_baseline_object( +#' n_trt_int = 100, +#' n_ctrl_int = 50, +#' n_ctrl_ext = 100, +#' covariates = baseline_covariates( +#' c("b1", "b2", "b3"), +#' means_int = c(0, 0, 0), +#' covariance_int = covariance_matrix(c(1, 1, 1), c(.8, .3, .8)) +#' ), +#' transformations = list( +#' exp_b1 = function(data) exp(data$b1), +#' b2 = binary_cutoff("b2", int_cutoff = 0.7, ext_cutoff = 0.5) +#' ) +#' ) +#' generate(bl_biomarkers) +setMethod( + f = "generate", + signature = "BaselineObject", + definition = generate.BaselineObject +) + + +#' Baseline Data Frame Object +#' +#' A `data.frame` with additional slot containing simulation definition +#' +#' @slot BaselineObject Simulated covariates definitions as `BaselineObject`. See [create_baseline_object()] +#' +#' @return A `BaselineDataFrame` +.baseline_dataframe <- setClass( + "BaselineDataFrame", + contains = "data.frame", + slots = c( + BaselineObject = "BaselineObject" + ) +) + +# Validity checks for Baseline Object +setValidity("BaselineObject", function(object) { + msg <- NULL + c(msg, check_list(object@covariates, types = "CorrelatedCovariates")) + c(msg, check_list(object@transformations, types = "function", names = "named")) + if (is.null(msg)) TRUE else msg +}) + +# show method +setMethod( + f = "show", + signature = "BaselineDataFrame", + definition = function(object) { + cat("Simulated Baseline Data") + cat("\n") + print.data.frame(object) + } +) + +#' Get Quantiles of Random Data +#' +#' Helper for use within transformation functions for [create_baseline_object()]. +#' +#' @param object a `BaselineDataFrame` +#' @param var character string name of the variable +#' +#' @return A numeric vector containing quantiles based on the data +#' generating distribution. +#' @export +#' @importFrom stats pnorm +#' +get_quantiles <- function(object, var) { + assert_class(object, "BaselineDataFrame") + + covs <- object@BaselineObject@covariates + names <- unlist(lapply(covs, function(x) x@names)) + assert_subset(var, choices = names) + + index <- which(names == var) + + mean_int <- unlist(lapply(covs, function(x) x@means_int))[index] + mean_ext <- unlist(lapply(covs, function(x) x@means_ext))[index] + + sd_int <- unlist(lapply(covs, function(x) sqrt(diag(x@covariance_int))))[index] + sd_ext <- unlist(lapply(covs, function(x) sqrt(diag(x@covariance_ext))))[index] + + ifelse( + object[["ext"]] == 0, + pnorm(object[[var]], mean_int, sd_int), + pnorm(object[[var]], mean_ext, sd_ext) + ) +} + + +#' Binary Cut-Off Transformation +#' +#' @param name variable to transform +#' @param int_cutoff cut-off for internal patients, numeric between 0 and 1 +#' @param ext_cutoff cut-off for external patients, numeric between 0 and 1 +#' +#' @return Transformation function to be used in [create_baseline_object()]. +#' Sets quantile values larger than cut-off value to `TRUE` otherwise `FALSE`. +#' @export +#' @examples +#' # Creates a simple function, where `data` is a `BaselineDataFrame`: +#' function(data) { +#' ext <- data$ext == 0 +#' q <- get_quantiles(data, name) +#' ifelse(ext, q > int_cutoff, q > ext_cutoff) +#' } +#' +binary_cutoff <- function(name, int_cutoff, ext_cutoff) { + assert_character(name, len = 1, any.missing = FALSE) + assert_numeric(int_cutoff, lower = 0, upper = 1, any.missing = FALSE, len = 1) + assert_numeric(ext_cutoff, lower = 0, upper = 1, any.missing = FALSE, len = 1) + function(data) { + ext <- data$ext == 0 + q <- get_quantiles(data, name) + as.integer(ifelse(ext, q > int_cutoff, q > ext_cutoff)) + } +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 47c7b920..92ca693d 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -30,3 +30,4 @@ articles: - "`propensity_scores`" - "`overview_slides`" - "`weighting`" + - "`data_simulation`" diff --git a/design/Simulate_data_baseline.Rmd b/design/Simulate_data_baseline.Rmd new file mode 100644 index 00000000..7e1417ea --- /dev/null +++ b/design/Simulate_data_baseline.Rmd @@ -0,0 +1,107 @@ +--- +title: "Simulate Data: Baseline" +author: "Isaac Gravestock" +date: "2023-03-31" +output: html_document +--- + +# 1. Baseline Design + +To prepare for simulating survival data, we need to first create the baseline data for the cohort. + +Requirements: + +1. number of patients in the 3 arms +2. arbitrary number of correlated variables +3. transform to binary variables by cut-off value +4. set names +5. set means for internal and external arms + +```{r, eval = FALSE} +# generate correlated variables +correlated_covs <- add_covariates( + names = c("x1", "x2", "x3"), + means_int = c(0, 0, 0), + means_ext = means, + covariance = covariance_matrix(diag = c(1, 1, 1), upper_tri = c(0, .2, .3)) +) + +# generate uncorrelated variables +indep_covs <- add_covariates( + names = c("age", "sex"), + means = c(60, 0.5), + means_ext = means, + covariance = covariance_matrix(diag = c(1, 1), upper_tri = c(0, 0)) +) + +baseline_obj <- sim_baseline( + sample_size = c(trt_int = 100, ctrl_int = 50, ctrl_ext = 200), + covariates = list(correlated_covs, indep_covs) +) %>% transform( + x1 = binary_tranform(x1, cut_off = 0.4, cut_off_ext = 0.45), + age2 = \(data) data$age^2 +) + + + +baseline_obj <- sim_baseline( + sample_size = c(trt_int = 100, ctrl_int = 50, ctrl_ext = 200), + covariates = list(correlated_covs, indep_covs), + transformations = list( + x1 = binary_tranform(cut_off = 0.4, cut_off_ext = 0.45), + age2 = \(data) data$age^2 + ) +) +``` + +`sim_baseline` is enough to specify a cohort to generate survival times. +Implementation of the above functions can be a first phase, generating survival data will be in another +design doc + +# Functions descriptions + +```{r} +# Specify covariates to include in baseline +# +# names: covariates names +# means_int: means for internal trial patients +# means_ext: means for external patients +# covariance_int: covariance matrix for multivariate normal for internal +# covariance_ext: covariance matrix for multivariate normal for external +# +# Returns object capturing these parameters for later processing +add_covariates(names, means_int, means_ext, covariance_int, covariance_ext) + +# Transform variables +# +# ... Arguments corresponding to the names in `add_covariates` taking functions +# which takes a list of vectors of percentiles (int, ext), transforms them, and returns +# a list of vectors of the same dimension. +# +# Returns a modified covariates object +transform(...) + +binary_tranform <- function(cut_off_int, cut_off_ext) { + function(qlist) { + list(int = qlist$int > cut_off_int, ext = qlist$ext > cut_off_ext) + } +} + + +# Covariance matrix +# +# diag: variances to put in the main diagonal +# upper_tri: covariance terms to put in the upper and lower triangles. Given by column +covariance_matrix(diag, upper_tri) + +# Simulate Baseline +# +# sample_size: named vector of sample sizes (maybe these should be directly parameters) +# covariates: a list of covariates objects +# +# Constructs the multivariate normal distribution by combining the means and covariance matrices blockwise. +# Generates a data frame using mvrnorm(). +# Any variables to be transformed and calculated back to percentiles using marginal distribution and then +# transformed. +sim_baseline(sample_size, covariates) +``` diff --git a/design/Simulate_data_survival.Rmd b/design/Simulate_data_survival.Rmd new file mode 100644 index 00000000..e55920b9 --- /dev/null +++ b/design/Simulate_data_survival.Rmd @@ -0,0 +1,73 @@ +--- +title: 'Simulate Data: Survival' +author: "Isaac Gravestock" +date: "2023-06-20" +output: html_document +--- + +Continuing from the design of baseline covariates, we consider how to implement survival data simulation. + +# 2. Survival Design + +For survival we need to specify in addition: + + - baseline hazard parameters + - HR: + - internal/external HR "drift" + - covariate HR coefficients + - treatment effect HR + +```{r} +# each of these builds up the object +# nothing needs to generate any data +# +# implement generate() method for all of them +# generate methods are recursive, +# highest level would take n=N repeat from highest level +# + + +baseline_obj %>% + design_matrx(~ sex + trt + ext) %>% + linear_predictor(betas = c(sex = 0.3, trt = 1, ext = 1.1)) %>% + simulate_survival(hazard_model = weibull_ph(shape = 1, scale = 0.9)) %>% + survival_matrix() %>% # think of better name.... + generate(n = 100) + +generate(baseline_obj()) %>% + mutate(age2 = age^2) %>% + design_matrix(~ sex + trt + ext) + +baseline_design <- design_matrix(baseline_obj, ~ sex + I(age^2) + trt + ext, preprocess_fun = function(data) data) +baseline_design2 <- design_matrix(baseline_obj, ~., preprocess_fun = function(data) data$age2 <- data$age^2) +colnames(baseline_design) + + +baseline_predictor <- linear_predictor( + design = baseline_design, # matrix + betas = c( + sex = .3, + "I(age^2)" = .4, + trt = 1, + ext = 1.1 + ) +) + +simulate_survival( + baseline_predictor, + hazard_model = weibull_ph(shape = 0.76, lambda = 1) # is a function that takes vector of LPs and gives vector of times +) + +survival_matrix( + baseline_design, + surv_times = simulate_survival(baseline_predictor, hazard_model = weibull_ph(shape = 0.76, lambda = 1)), + censor_times = censor(), + recruitment = recruitment() +) +``` + +Refer to psborrow for how these can be calculated and combined. + +- recruitment rate +- cut off +- censoring diff --git a/man/BaselineDataFrame-class.Rd b/man/BaselineDataFrame-class.Rd new file mode 100644 index 00000000..f4b57056 --- /dev/null +++ b/man/BaselineDataFrame-class.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulate_data.R +\docType{class} +\name{BaselineDataFrame-class} +\alias{BaselineDataFrame-class} +\alias{.baseline_dataframe} +\title{Baseline Data Frame Object} +\value{ +A \code{BaselineDataFrame} +} +\description{ +A \code{data.frame} with additional slot containing simulation definition +} +\section{Slots}{ + +\describe{ +\item{\code{BaselineObject}}{Simulated covariates definitions as \code{BaselineObject}. See \code{\link[=create_baseline_object]{create_baseline_object()}}} +}} + diff --git a/man/BaselineObject-class.Rd b/man/BaselineObject-class.Rd new file mode 100644 index 00000000..4c4ede19 --- /dev/null +++ b/man/BaselineObject-class.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulate_data.R +\docType{class} +\name{BaselineObject-class} +\alias{BaselineObject-class} +\alias{.baseline_object} +\title{\code{BaselineObject} class for data simulation} +\description{ +\code{BaselineObject} class for data simulation +} +\section{Slots}{ + +\describe{ +\item{\code{n_trt_int}}{integer. Number of internal treated patients} + +\item{\code{n_ctrl_int}}{integer. Number of internal control patients} + +\item{\code{n_ctrl_ext}}{integer. Number of external control patients} + +\item{\code{covariates}}{list. List of correlated covariates objects, see \code{\link[=baseline_covariates]{baseline_covariates()}}} + +\item{\code{transformations}}{list. List of named transformation functions.} +}} + diff --git a/man/baseline_covariates.Rd b/man/baseline_covariates.Rd new file mode 100644 index 00000000..db83bd21 --- /dev/null +++ b/man/baseline_covariates.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulate_data.R +\name{baseline_covariates} +\alias{baseline_covariates} +\title{Specify Correlated Baseline Covariates} +\usage{ +baseline_covariates( + names, + means_int, + means_ext = means_int, + covariance_int, + covariance_ext = covariance_int +) +} +\arguments{ +\item{names}{character vector of variable names.} + +\item{means_int}{numeric vector of means for internal patients. Must have same length as \code{names}} + +\item{means_ext}{numeric vector of means for external patients. Must have same length as \code{names}} + +\item{covariance_int}{variance-covariance matrix for generating multivariate normal for internal patients. +Must be square matrix with same number of rows and \code{length(names)}} + +\item{covariance_ext}{variance-covariance matrix for generating multivariate normal data for external patients. +Must be square matrix with same number of rows and \code{length(names)}} +} +\value{ +\link[=BaselineObject-class]{BaselineObject} to build simulated dataset +} +\description{ +Set parameters to generate correlated multivariate normal data for internal and external patients. +} +\examples{ +corr_covs <- baseline_covariates( + names = c("b1", "b2"), + means_int = c(5, 25), + covariance_int = covariance_matrix(diag = c(1, 1), upper_tri = 0.4) +) +} diff --git a/man/binary_cutoff.Rd b/man/binary_cutoff.Rd new file mode 100644 index 00000000..52af170d --- /dev/null +++ b/man/binary_cutoff.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulate_data.R +\name{binary_cutoff} +\alias{binary_cutoff} +\title{Binary Cut-Off Transformation} +\usage{ +binary_cutoff(name, int_cutoff, ext_cutoff) +} +\arguments{ +\item{name}{variable to transform} + +\item{int_cutoff}{cut-off for internal patients, numeric between 0 and 1} + +\item{ext_cutoff}{cut-off for external patients, numeric between 0 and 1} +} +\value{ +Transformation function to be used in \code{\link[=create_baseline_object]{create_baseline_object()}}. +Sets quantile values larger than cut-off value to \code{TRUE} otherwise \code{FALSE}. +} +\description{ +Binary Cut-Off Transformation +} +\examples{ +# Creates a simple function, where `data` is a `BaselineDataFrame`: +function(data) { + ext <- data$ext == 0 + q <- get_quantiles(data, name) + ifelse(ext, q > int_cutoff, q > ext_cutoff) +} + +} diff --git a/man/check_cmdstanr.Rd b/man/check_cmdstanr.Rd index 20088067..9c5dc749 100644 --- a/man/check_cmdstanr.Rd +++ b/man/check_cmdstanr.Rd @@ -4,7 +4,10 @@ \alias{check_cmdstanr} \title{Check Stan} \usage{ -check_cmdstanr() +check_cmdstanr(check_sampling = FALSE) +} +\arguments{ +\item{check_sampling}{Compile and sample from the "logistic" example model.} } \description{ Check whether \code{cmdstanr} is available and prints version and logistic example. diff --git a/man/covariance_matrix.Rd b/man/covariance_matrix.Rd new file mode 100644 index 00000000..edd9d4d6 --- /dev/null +++ b/man/covariance_matrix.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulate_data.R +\name{covariance_matrix} +\alias{covariance_matrix} +\title{Create Covariance Matrix} +\usage{ +covariance_matrix(diag, upper_tri) +} +\arguments{ +\item{diag}{Diagonal entries of the covariance matrix} + +\item{upper_tri}{Upper triangle entries of the matrix, specified column wise.} +} +\value{ +A symmetric matrix with \code{diag} values on the main diagonal and +\code{upper_tri} values in the lower and upper triangles. +} +\description{ +Create Covariance Matrix +} +\examples{ +m1 <- covariance_matrix(c(1, 1, 1, 1), c(.8, .3, .8, 0, 0, 0)) +m1 +mvtnorm::rmvnorm(5, mean = c(0, 0, 0, 0), sigma = m1) + +# No correlation +covariance_matrix(c(1, 2, 3)) + +} diff --git a/man/create_baseline_object.Rd b/man/create_baseline_object.Rd new file mode 100644 index 00000000..f1d349cf --- /dev/null +++ b/man/create_baseline_object.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulate_data.R +\name{create_baseline_object} +\alias{create_baseline_object} +\title{Create Baseline Data Simulation Object} +\usage{ +create_baseline_object( + n_trt_int, + n_ctrl_int, + n_ctrl_ext, + covariates, + transformations +) +} +\arguments{ +\item{n_trt_int}{Number of internal treated patients} + +\item{n_ctrl_int}{Number of internal control patients} + +\item{n_ctrl_ext}{Number of external control patients} + +\item{covariates}{List of correlated covariates objects, see \code{\link[=baseline_covariates]{baseline_covariates()}}} + +\item{transformations}{List of named transformation functions.} +} +\value{ +A \link[=BaselineObject-class]{BaselineObject} +} +\description{ +Create Baseline Data Simulation Object +} +\details{ +Transformation functions are evaluated in order and create or overwrite a column +in the data.frame with that name. The function should take a \code{data.frame} (specifically +a \code{BaselineDataFrame} object from \code{generate(BaselineObject)}) and return a vector with +length identical to the total number of patients. +The \verb{@BaselineObject} slot may be accessed directly or with \code{\link[=get_quantiles]{get_quantiles()}} to +create transformations. See \code{\link[=binary_cutoff]{binary_cutoff()}} +} +\examples{ +bl_no_covs <- create_baseline_object( + n_trt_int = 100, + n_ctrl_int = 50, + n_ctrl_ext = 100 +) + + +bl_biomarkers <- create_baseline_object( + n_trt_int = 100, + n_ctrl_int = 50, + n_ctrl_ext = 100, + covariates = baseline_covariates( + c("b1", "b2", "b3"), + means_int = c(0, 0, 0), + covariance_int = covariance_matrix(c(1, 1, 1), c(.8, .3, .8)) + ), + transformations = list( + exp_b1 = function(data) exp(data$b1), + b2 = binary_cutoff("b2", int_cutoff = 0.7, ext_cutoff = 0.5) + ) +) + +} diff --git a/man/generate-BaselineObject-method.Rd b/man/generate-BaselineObject-method.Rd new file mode 100644 index 00000000..42f37977 --- /dev/null +++ b/man/generate-BaselineObject-method.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulate_data.R +\name{generate,BaselineObject-method} +\alias{generate,BaselineObject-method} +\title{Generate Data for a \code{BaselineObject}} +\usage{ +\S4method{generate}{BaselineObject}(x, ...) +} +\arguments{ +\item{x}{a \code{BaselineObject} object created by \link{create_baseline_object}} + +\item{...}{additional parameters are ignored} +} +\value{ +A \link[=BaselineDataFrame-class]{BaselineDataFrame} object +} +\description{ +Generate Data for a \code{BaselineObject} +} +\examples{ +bl_biomarkers <- create_baseline_object( + n_trt_int = 100, + n_ctrl_int = 50, + n_ctrl_ext = 100, + covariates = baseline_covariates( + c("b1", "b2", "b3"), + means_int = c(0, 0, 0), + covariance_int = covariance_matrix(c(1, 1, 1), c(.8, .3, .8)) + ), + transformations = list( + exp_b1 = function(data) exp(data$b1), + b2 = binary_cutoff("b2", int_cutoff = 0.7, ext_cutoff = 0.5) + ) +) +generate(bl_biomarkers) +} diff --git a/man/generate.Rd b/man/generate.Rd new file mode 100644 index 00000000..213766ba --- /dev/null +++ b/man/generate.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics.R +\name{generate} +\alias{generate} +\title{Generate Data from Object} +\usage{ +generate(x, ...) +} +\arguments{ +\item{x}{object} + +\item{...}{Other arguments passed to methods} +} +\description{ +Generate Data from Object +} diff --git a/man/get_quantiles.Rd b/man/get_quantiles.Rd new file mode 100644 index 00000000..c3424aa2 --- /dev/null +++ b/man/get_quantiles.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulate_data.R +\name{get_quantiles} +\alias{get_quantiles} +\title{Get Quantiles of Random Data} +\usage{ +get_quantiles(object, var) +} +\arguments{ +\item{object}{a \code{BaselineDataFrame}} + +\item{var}{character string name of the variable} +} +\value{ +A numeric vector containing quantiles based on the data +generating distribution. +} +\description{ +Helper for use within transformation functions for \code{\link[=create_baseline_object]{create_baseline_object()}}. +} diff --git a/tests/testthat/test-cmdstan.R b/tests/testthat/test-cmdstan.R index 19dd31be..1d7c1cb5 100644 --- a/tests/testthat/test-cmdstan.R +++ b/tests/testthat/test-cmdstan.R @@ -1,8 +1,12 @@ test_that("check_cmdstanr runs as expected", { expect_output( - result <- check_cmdstanr(), + check_cmdstanr(check_sampling = FALSE), "int N;", fixed = TRUE ) + + skip_on_ci() + skip_on_cran() + result <- check_cmdstanr(check_sampling = TRUE) expect_class(result, "CmdStanMCMC") }) diff --git a/tests/testthat/test-simulate_data.R b/tests/testthat/test-simulate_data.R new file mode 100644 index 00000000..5ea9219f --- /dev/null +++ b/tests/testthat/test-simulate_data.R @@ -0,0 +1,38 @@ +test_that("covariance_matrix works", { + result <- covariance_matrix(c(1:4), c(1, 0, 0, 0, 0, 0)) + expected <- matrix( + c( + 1, 1, 0, 0, + 1, 2, 0, 0, + 0, 0, 3, 0, + 0, 0, 0, 4 + ), + nrow = 4 + ) + + expect_equal(result, expected) +}) + +test_that("covariance_matrix finds positive definite matrices", { + expect_warning( + result <- covariance_matrix(c(1:4), c(1, 4, -2, 1, 0, 0)), + "Finding nearest positive definite matrix" + ) + expected <- matrix( + c( + 1, -0.173717259157214, 1.55477286166658, 0.606677145265074, + -0.173717259157214, 2, -1.25191988052734, 0.120331114550728, + 1.55477286166658, -1.25191988052734, 3, 0.250687992911699, + 0.606677145265074, 0.120331114550728, 0.250687992911699, 4 + ), + nrow = 4 + ) + + expect_equal(result, expected) +}) + + +test_that("covariance_matrix fails with incompatible arguments", { + expect_error(covariance_matrix(c(1:3), c(0, 0))) + expect_error(covariance_matrix(c("a", "b", "c"))) +}) diff --git a/vignettes/data_simulation.Rmd b/vignettes/data_simulation.Rmd new file mode 100644 index 00000000..c6e599e4 --- /dev/null +++ b/vignettes/data_simulation.Rmd @@ -0,0 +1,104 @@ +--- +title: "8. Data Simulation" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{8. Data Simulation} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +options("max.print" = 20) +``` + +This vignette will show the internal functions for generating data that can be used in simulation studies. + +```{r setup} +library(psborrow2) +``` + +## Generating Baseline Data + +First we define how to generate baseline data for our study. In its simplest form we only need to define how many +patients are in the arms: treated internal, control internal, control external. + +```{r} +simple_baseline <- create_baseline_object( + n_trt_int = 100, + n_ctrl_int = 50, + n_ctrl_ext = 100 +) +``` + +This object defines how the data will be created. To actually generate a sample, use `generate()` to produce a +`data.frame`. + +```{r} +generate(simple_baseline) +``` + +Often we are interested in some additional covariates. Internally the package uses multivariate normal distributions, +so we need to specify the mean and (co-)variance. We have the option to specify different distribution parameters +for the internal and external arms. The internal arms are assumed to be randomized and therefore come from the same +distribution. If no external parameters are specified, the internal ones are used for both. + +```{r} +with_age <- create_baseline_object( + n_trt_int = 100, + n_ctrl_int = 50, + n_ctrl_ext = 100, + covariates = baseline_covariates( + names = "age", + means_int = 55, + covariance_int = covariance_matrix(5) + ) +) +generate(with_age) +``` + +In a more complex setting, we can generate correlated baseline covariates. +```{r} +with_age_score <- create_baseline_object( + n_trt_int = 100, + n_ctrl_int = 50, + n_ctrl_ext = 100, + covariates = baseline_covariates( + names = c("age", "score"), + means_int = c(55, 5), + means_ext = c(60, 5), + covariance_int = covariance_matrix(c(5, 1)) + ) +) +data_age_score <- generate(with_age_score) +``` + +```{r} +plot( + x = data_age_score$age, + y = data_age_score$score, + col = data_age_score$trt + 1, + pch = data_age_score$ext * 16, + xlab = "Age", + ylab = "Score" +) +legend( + "topright", + legend = c("internal treated", "internal control", "external control"), + col = c(2, 1, 1), + pch = c(0, 0, 16) +) +``` + +Finally, we have the option to transform non-normal covariates, such as with binary cut-offs. +These can be added to existing objects and can use built-in functions or used defined. +```{r} +with_age_score@transformations <- list( + score_high = binary_cutoff("score", int_cutoff = 0.7, ext_cutoff = 0.7), + score_rounded = function(data) round(data$score) +) +generate(with_age_score) +``` diff --git a/vignettes/propensity_scores.Rmd b/vignettes/propensity_scores.Rmd index fb8d5e36..5d93a451 100644 --- a/vignettes/propensity_scores.Rmd +++ b/vignettes/propensity_scores.Rmd @@ -27,78 +27,11 @@ library(psborrow2) ``` Propensity scores (PS) methods offer various ways to adjust analyses for differences in groups of patients. - @austin2013 discusses various approaches for using PS with survival analyses to obtain effect measures similar to -randomized controlled trials. - -@wang2021 discuss using PS for IPTW, matching and stratification in combination with a Bayesian analysis. These methods -allow for the separation of the design and analysis into two stages, which may be attractive in a regulatory setting. - +randomized controlled trials. @wang2021 discuss using PS for IPTW, matching and stratification in combination with a Bayesian analysis. +These methods allow for the separation of the design and analysis into two stages, which may be attractive in a regulatory setting. Another approach is the direct inclusion of the PS as a covariate in the outcome model. -## Propensity Score Calculation - -Propensity scores are the probabilities that an observation belongs to one group or another, in this setting the -internal or external patients. The PS models are fitted based on baseline covariates that could be confounders between -outcome and being in the internal/external group. - -The simplest approach for calculating PS is a logistic generalized linear model. First, let's load some demo data from -`psborrow2`. - -```{r data} -example_dataframe <- as.data.frame(example_matrix) -example_dataframe$int <- 1 - example_dataframe$ext -``` - -Let's now fit a logistic generalized linear model and add the PS as an additional covariate. - -```{r glm} -ps_model <- glm(int ~ cov1 + cov2 + cov3 + cov4, - data = example_dataframe, - family = binomial -) -ps <- predict(ps_model, type = "response") -example_dataframe$ps <- ps - -example_dataframe$ps_cat_ <- cut( - example_dataframe$ps, - breaks = 5, - include.lowest = TRUE -) -levels(example_dataframe$ps_cat_) <- c( - "ref", "low", - "low_med", "high_med", "high" -) -``` - -```{r include = FALSE} -example_matrix_ps <- create_data_matrix( - example_dataframe, - outcome = c("time", "cnsr"), - trt_flag_col = "trt", - ext_flag_col = "ext", - covariates = ~ ps_cat_ + ps -) -``` - -The calculated PS can be included as a covariate in the analysis object, for the "covariate adjustment" PS approach: - -```{r analysis_obj} -analysis_ps_cuts <- create_analysis_obj( - data_matrix = example_matrix_ps, - covariates = add_covariates( - c("ps_cat_low", "ps_cat_low_med", "ps_cat_high_med", "ps_cat_high"), - priors = normal_prior(0, 10000) - ), - outcome = exp_surv_dist("time", "cnsr", normal_prior(0, 10000)), - borrowing = borrowing_details("Full borrowing", "ext"), - treatment = treatment_details("trt", normal_prior(0, 10000)) -) - -result_ps_cuts <- mcmc_sample(analysis_ps_cuts) -``` - - ## Alternative PS Weights with `WeightIt` The [`WeightIt`](https://cran.r-project.org/package=WeightIt) package can calculate PS and other balancing @@ -109,6 +42,9 @@ In addition, weights can be calculated differently for different estimands. Here ```{r WeightIt} library(WeightIt) +example_dataframe <- as.data.frame(example_matrix) +example_dataframe$int <- 1 - example_dataframe$ext + weightit_model <- weightit( int ~ cov1 + cov2 + cov3 + cov4, data = example_dataframe, @@ -128,6 +64,12 @@ library(cobalt) bal.plot(weightit_model) ``` +"Love plots" can also be generated that compare the populations before and after weighting: + +```{r loveplot} +love.plot(weightit_model, stars = "std") +``` + The PS values can be copied into the data set and the analysis object can be constructed as before. ```{r covs}