From 0b2d5536dc2c4a35b2553edbcbbe62031fac3bb5 Mon Sep 17 00:00:00 2001 From: njtierney Date: Thu, 10 Oct 2024 18:42:39 +1100 Subject: [PATCH 1/4] first pass at splitting up check_mvn_samples and check_samples into smaller functions --- tests/testthat/helpers.R | 47 ++++++++++++++++++++++++-------- tests/testthat/test_posteriors.R | 33 ++++++++++++++++++---- 2 files changed, 64 insertions(+), 16 deletions(-) diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R index 7472208d..7528eac9 100644 --- a/tests/testthat/helpers.R +++ b/tests/testthat/helpers.R @@ -834,7 +834,7 @@ check_mvn_samples <- function(sampler, n_effective = 3000) { # away from truth. There's a 1/100 chance of any one of these scaled errors # being greater than qnorm(0.99) if the sampler is correct errors <- scaled_error(stat_draws, stat_truth) - expect_lte(max(errors), stats::qnorm(0.99)) + errors } # sample values of greta array 'x' (which must follow a distribution), and @@ -864,19 +864,44 @@ check_samples <- function( iid_samples <- iid_function(neff) mcmc_samples <- as.matrix(draws) - # plot - if (is.null(title)) { - distrib <- get_node(x)$distribution$distribution_name - sampler_name <- class(sampler)[1] - title <- paste(distrib, "with", sampler_name) - } + # # plot + # if (is.null(title)) { + # distrib <- get_node(x)$distribution$distribution_name + # sampler_name <- class(sampler)[1] + # title <- paste(distrib, "with", sampler_name) + # } - stats::qqplot(mcmc_samples, iid_samples, main = title) - graphics::abline(0, 1) + # stats::qqplot(mcmc_samples, iid_samples, main = title) + # graphics::abline(0, 1) # do a formal hypothesis test - suppressWarnings(stat <- ks.test(mcmc_samples, iid_samples)) - testthat::expect_gte(stat$p.value, 0.01) + # suppressWarnings(stat <- ks.test(mcmc_samples, iid_samples)) + # testthat::expect_gte(stat$p.value, 0.01) + + list( + mcmc_samples = mcmc_samples, + iid_samples = iid_samples, + distrib = get_node(x)$distribution$distribution_name, + sampler_name = class(sampler)[1] + ) +} + +qqplot_checked_samples <- function(checked_samples, title){ + + distrib <- checked_samples$distrib + sampler_name <- checked_samples$sampler_name + title <- paste(distrib, "with", sampler_name) + + mcmc_samples <- checked_samples$mcmc_samples + iid_samples <- checked_samples$iid_samples + + stats::qqplot( + x = mcmc_samples, + y = iid_samples, + main = title + ) + + graphics::abline(0, 1) } ## helpers for looping through optimisers diff --git a/tests/testthat/test_posteriors.R b/tests/testthat/test_posteriors.R index 173e539f..865d019e 100644 --- a/tests/testthat/test_posteriors.R +++ b/tests/testthat/test_posteriors.R @@ -38,9 +38,14 @@ test_that("samplers are unbiased for bivariate normals", { skip_if_not_release() - check_mvn_samples(hmc()) - check_mvn_samples(rwmh()) - check_mvn_samples(slice()) + hmc_mvn_samples <- check_mvn_samples(hmc()) + expect_lte(max(hmc_mvn_samples), stats::qnorm(0.99)) + + rwmh_mvn_samples <- check_mvn_samples(rwmh()) + expect_lte(max(rwmh_mvn_samples), stats::qnorm(0.99)) + + slice_mvn_samples <- check_mvn_samples(slice()) + expect_lte(max(rwmh_mvn_samples), stats::qnorm(0.99)) }) test_that("samplers are unbiased for chi-squared", { @@ -52,7 +57,17 @@ test_that("samplers are unbiased for chi-squared", { x <- chi_squared(df) iid <- function(n) rchisq(n, df) - check_samples(x, iid) + chi_squared_checked <- check_samples(x = x, + iid_function = iid, + sampler = hmc()) + + # do the plotting + qqplot_checked_samples(chi_squared_checked) + + # do a formal hypothesis test + suppressWarnings(stat <- ks.test(chi_squared_checked$mcmc_samples, + chi_squared_checked$iid_samples)) + expect_gte(stat$p.value, 0.01) }) ## TF1/2 this sampler fails @@ -64,8 +79,16 @@ test_that("samplers are unbiased for standard uniform", { x <- uniform(0, 1) iid <- runif - check_samples(x, iid) + runif_checked <- check_samples(x, iid) + + qqplot_checked_samples(runif_checked) + + # do a formal hypothesis test + suppressWarnings(stat <- ks.test(chi_squared_checked$mcmc_samples, + chi_squared_checked$iid_samples)) + expect_gte(stat$p.value, 0.01) }) +## UP TO HERE ## This one is failing test_that("samplers are unbiased for LKJ", { skip_if_not(check_tf_version()) From f5a7303ab592ea0a9524fbb0ff7242b1aea4ac3f Mon Sep 17 00:00:00 2001 From: njtierney Date: Tue, 15 Oct 2024 10:54:46 +1100 Subject: [PATCH 2/4] add a helper function for doing Kolmogorov-Smirnov test on mcmc samples vs iid samples --- tests/testthat/helpers.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R index 7528eac9..3f7ba5ef 100644 --- a/tests/testthat/helpers.R +++ b/tests/testthat/helpers.R @@ -904,6 +904,14 @@ qqplot_checked_samples <- function(checked_samples, title){ graphics::abline(0, 1) } +## helpers for running Kolmogorov-Smirnov test for MCMC samples vs IID samples +ks_test_mcmc_vs_iid <- function(checked_samples){ + # do a formal hypothesis test + suppressWarnings(stat <- ks.test(checked_samples$mcmc_samples, + checked_samples$iid_samples)) + stat +} + ## helpers for looping through optimisers run_opt <- function( m, From 039d420c770a33f57645ede950b67e1750db5738 Mon Sep 17 00:00:00 2001 From: njtierney Date: Tue, 15 Oct 2024 10:56:32 +1100 Subject: [PATCH 3/4] split test_posteriors up into separate test files. * Makes them easier to turn on/off * Easier to track time taken for each test * Have now set "binomial", "chi_squared", and "standard_uniform" all to run on CI tests, as these run quite quickly (less than 10s). * This should give us a bit more expanded testing for greta without sacrificing too much --- tests/testthat/test_posteriors.R | 186 ------------------ tests/testthat/test_posteriors_binomial.R | 34 ++++ .../test_posteriors_bivariate_normal.R | 17 ++ tests/testthat/test_posteriors_chi_squared.R | 23 +++ tests/testthat/test_posteriors_geweke.R | 66 +++++++ tests/testthat/test_posteriors_lkj.R | 28 +++ .../test_posteriors_standard_uniform.R | 20 ++ tests/testthat/test_posteriors_wishart.R | 31 +++ 8 files changed, 219 insertions(+), 186 deletions(-) delete mode 100644 tests/testthat/test_posteriors.R create mode 100644 tests/testthat/test_posteriors_binomial.R create mode 100644 tests/testthat/test_posteriors_bivariate_normal.R create mode 100644 tests/testthat/test_posteriors_chi_squared.R create mode 100644 tests/testthat/test_posteriors_geweke.R create mode 100644 tests/testthat/test_posteriors_lkj.R create mode 100644 tests/testthat/test_posteriors_standard_uniform.R create mode 100644 tests/testthat/test_posteriors_wishart.R diff --git a/tests/testthat/test_posteriors.R b/tests/testthat/test_posteriors.R deleted file mode 100644 index 865d019e..00000000 --- a/tests/testthat/test_posteriors.R +++ /dev/null @@ -1,186 +0,0 @@ -Sys.setenv("RELEASE_CANDIDATE" = "false") -test_that("posterior is correct (binomial)", { - skip_if_not(check_tf_version()) - - skip_if_not_release() - - # analytic solution to the posterior of the paramter of a binomial - # distribution, with uniform prior - n <- 100 - pos <- rbinom(1, n, runif(1)) - theta <- uniform(0, 1) - distribution(pos) <- binomial(n, theta) - m <- model(theta) - - draws <- get_enough_draws(m, hmc(), 2000, verbose = FALSE) - - samples <- as.matrix(draws) - - # analytic solution to posterior is beta(1 + pos, 1 + N - pos) - shape1 <- 1 + pos - shape2 <- 1 + n - pos - - # qq plot against true quantiles - quants <- (1:99) / 100 - q_target <- qbeta(quants, shape1, shape2) - q_est <- quantile(samples, quants) - plot(q_target ~ q_est, main = "binomial posterior") - abline(0, 1) - - n_draws <- round(coda::effectiveSize(draws)) - comparison <- rbeta(n_draws, shape1, shape2) - suppressWarnings(test <- ks.test(samples, comparison)) - expect_gte(test$p.value, 0.01) -}) - -test_that("samplers are unbiased for bivariate normals", { - skip_if_not(check_tf_version()) - - skip_if_not_release() - - hmc_mvn_samples <- check_mvn_samples(hmc()) - expect_lte(max(hmc_mvn_samples), stats::qnorm(0.99)) - - rwmh_mvn_samples <- check_mvn_samples(rwmh()) - expect_lte(max(rwmh_mvn_samples), stats::qnorm(0.99)) - - slice_mvn_samples <- check_mvn_samples(slice()) - expect_lte(max(rwmh_mvn_samples), stats::qnorm(0.99)) -}) - -test_that("samplers are unbiased for chi-squared", { - skip_if_not(check_tf_version()) - - skip_if_not_release() - - df <- 5 - x <- chi_squared(df) - iid <- function(n) rchisq(n, df) - - chi_squared_checked <- check_samples(x = x, - iid_function = iid, - sampler = hmc()) - - # do the plotting - qqplot_checked_samples(chi_squared_checked) - - # do a formal hypothesis test - suppressWarnings(stat <- ks.test(chi_squared_checked$mcmc_samples, - chi_squared_checked$iid_samples)) - expect_gte(stat$p.value, 0.01) -}) - -## TF1/2 this sampler fails -test_that("samplers are unbiased for standard uniform", { - skip_if_not(check_tf_version()) - - skip_if_not_release() - - x <- uniform(0, 1) - iid <- runif - - runif_checked <- check_samples(x, iid) - - qqplot_checked_samples(runif_checked) - - # do a formal hypothesis test - suppressWarnings(stat <- ks.test(chi_squared_checked$mcmc_samples, - chi_squared_checked$iid_samples)) - expect_gte(stat$p.value, 0.01) -}) -## UP TO HERE -## This one is failing -test_that("samplers are unbiased for LKJ", { - skip_if_not(check_tf_version()) - - skip_if_not_release() - - x <- lkj_correlation(3, 2)[1, 2] - iid <- function(n) { - rlkjcorr(n, 3, 2)[, 1, 2] - } - - check_samples(x, iid, hmc(), one_by_one = TRUE) -}) - -test_that("samplers are unbiased for Wishart", { - skip_if_not(check_tf_version()) - - skip_if_not_release() - - sigma <- matrix( - c(1.2, 0.7, 0.7, 2.3), - 2, 2 - ) - df <- 4 - x <- wishart(df, sigma)[1, 2] - iid <- function(n) { - rWishart(n, df, sigma)[1, 2, ] - } - - check_samples(x, iid, one_by_one = TRUE) -}) -## TF1/2 - method for this test needs to be updated for TF2 -## Passing this along to version 0.6.0 now -# test_that("samplers pass geweke tests", { -# skip_if_not(check_tf_version()) -# -# skip_if_not_release() -# -# # nolint start -# # run geweke tests on this model: -# # theta ~ normal(mu1, sd1) -# # x[i] ~ normal(theta, sd2) -# # for i in N -# # nolint end -# -# n <- 10 -# mu1 <- rnorm(1, 0, 3) -# sd1 <- rlnorm(1) -# sd2 <- rlnorm(1) -# -# # prior (n draws) -# p_theta <- function(n) { -# rnorm(n, mu1, sd1) -# } -# -# # likelihood -# p_x_bar_theta <- function(theta) { -# rnorm(n, theta, sd2) -# } -# -# # define the greta model (single precision for slice sampler) -# x <- as_data(rep(0, n)) -# greta_theta <- normal(mu1, sd1) -# distribution(x) <- normal(greta_theta, sd2) -# model <- model(greta_theta, precision = "single") -# -# # run tests on all available samplers -# check_geweke( -# sampler = hmc(), -# model = model, -# data = x, -# p_theta = p_theta, -# p_x_bar_theta = p_x_bar_theta, -# title = "HMC Geweke test" -# ) -# -# check_geweke( -# sampler = rwmh(), -# model = model, -# data = x, -# p_theta = p_theta, -# p_x_bar_theta = p_x_bar_theta, -# warmup = 2000, -# title = "RWMH Geweke test" -# ) -# -# check_geweke( -# sampler = slice(), -# model = model, -# data = x, -# p_theta = p_theta, -# p_x_bar_theta = p_x_bar_theta, -# title = "slice sampler Geweke test" -# ) -# }) diff --git a/tests/testthat/test_posteriors_binomial.R b/tests/testthat/test_posteriors_binomial.R new file mode 100644 index 00000000..7ad3a2ca --- /dev/null +++ b/tests/testthat/test_posteriors_binomial.R @@ -0,0 +1,34 @@ +Sys.setenv("RELEASE_CANDIDATE" = "true") +test_that("posterior is correct (binomial)", { + skip_if_not(check_tf_version()) + + skip_if_not_release() + + # analytic solution to the posterior of the paramter of a binomial + # distribution, with uniform prior + n <- 100 + pos <- rbinom(1, n, runif(1)) + theta <- uniform(0, 1) + distribution(pos) <- binomial(n, theta) + m <- model(theta) + + draws <- get_enough_draws(m, hmc(), 2000, verbose = FALSE) + + samples <- as.matrix(draws) + + # analytic solution to posterior is beta(1 + pos, 1 + N - pos) + shape1 <- 1 + pos + shape2 <- 1 + n - pos + + # qq plot against true quantiles + quants <- (1:99) / 100 + q_target <- qbeta(quants, shape1, shape2) + q_est <- quantile(samples, quants) + plot(q_target ~ q_est, main = "binomial posterior") + abline(0, 1) + + n_draws <- round(coda::effectiveSize(draws)) + comparison <- rbeta(n_draws, shape1, shape2) + suppressWarnings(test <- ks.test(samples, comparison)) + expect_gte(test$p.value, 0.01) +}) diff --git a/tests/testthat/test_posteriors_bivariate_normal.R b/tests/testthat/test_posteriors_bivariate_normal.R new file mode 100644 index 00000000..b3281487 --- /dev/null +++ b/tests/testthat/test_posteriors_bivariate_normal.R @@ -0,0 +1,17 @@ +# Currently takes about 30 seconds on an M1 mac +Sys.setenv("RELEASE_CANDIDATE" = "false") + +test_that("samplers are unbiased for bivariate normals", { + skip_if_not(check_tf_version()) + + skip_if_not_release() + + hmc_mvn_samples <- check_mvn_samples(sampler = hmc()) + expect_lte(max(hmc_mvn_samples), stats::qnorm(0.99)) + + rwmh_mvn_samples <- check_mvn_samples(sampler = rwmh()) + expect_lte(max(rwmh_mvn_samples), stats::qnorm(0.99)) + + slice_mvn_samples <- check_mvn_samples(sampler = slice()) + expect_lte(max(rwmh_mvn_samples), stats::qnorm(0.99)) +}) diff --git a/tests/testthat/test_posteriors_chi_squared.R b/tests/testthat/test_posteriors_chi_squared.R new file mode 100644 index 00000000..887ccb1d --- /dev/null +++ b/tests/testthat/test_posteriors_chi_squared.R @@ -0,0 +1,23 @@ +Sys.setenv("RELEASE_CANDIDATE" = "true") + +test_that("samplers are unbiased for chi-squared", { + skip_if_not(check_tf_version()) + + skip_if_not_release() + + df <- 5 + x <- chi_squared(df) + iid <- function(n) rchisq(n, df) + + chi_squared_checked <- check_samples(x = x, + iid_function = iid, + sampler = hmc()) + + # do the plotting + qqplot_checked_samples(chi_squared_checked) + + # do a formal hypothesis test + stat <- ks_test_mcmc_vs_iid(chi_squared_checked) + + expect_gte(stat$p.value, 0.01) +}) diff --git a/tests/testthat/test_posteriors_geweke.R b/tests/testthat/test_posteriors_geweke.R new file mode 100644 index 00000000..61e0a9ea --- /dev/null +++ b/tests/testthat/test_posteriors_geweke.R @@ -0,0 +1,66 @@ +Sys.setenv("RELEASE_CANDIDATE" = "false") + +## TF1/2 - method for this test needs to be updated for TF2 +## See https://github.com/greta-dev/greta/issues/720 +test_that("samplers pass geweke tests", { + skip_if_not(check_tf_version()) + + skip_if_not_release() + + # nolint start + # run geweke tests on this model: + # theta ~ normal(mu1, sd1) + # x[i] ~ normal(theta, sd2) + # for i in N + # nolint end + + n <- 10 + mu1 <- rnorm(1, 0, 3) + sd1 <- rlnorm(1) + sd2 <- rlnorm(1) + + # prior (n draws) + p_theta <- function(n) { + rnorm(n, mu1, sd1) + } + + # likelihood + p_x_bar_theta <- function(theta) { + rnorm(n, theta, sd2) + } + + # define the greta model (single precision for slice sampler) + x <- as_data(rep(0, n)) + greta_theta <- normal(mu1, sd1) + distribution(x) <- normal(greta_theta, sd2) + model <- model(greta_theta, precision = "single") + + # run tests on all available samplers + check_geweke( + sampler = hmc(), + model = model, + data = x, + p_theta = p_theta, + p_x_bar_theta = p_x_bar_theta, + title = "HMC Geweke test" + ) + + check_geweke( + sampler = rwmh(), + model = model, + data = x, + p_theta = p_theta, + p_x_bar_theta = p_x_bar_theta, + warmup = 2000, + title = "RWMH Geweke test" + ) + + check_geweke( + sampler = slice(), + model = model, + data = x, + p_theta = p_theta, + p_x_bar_theta = p_x_bar_theta, + title = "slice sampler Geweke test" + ) +}) diff --git a/tests/testthat/test_posteriors_lkj.R b/tests/testthat/test_posteriors_lkj.R new file mode 100644 index 00000000..2ec2c632 --- /dev/null +++ b/tests/testthat/test_posteriors_lkj.R @@ -0,0 +1,28 @@ +Sys.setenv("RELEASE_CANDIDATE" = "false") + +## This one is failing +test_that("samplers are unbiased for LKJ", { + skip_if_not(check_tf_version()) + + skip_if_not_release() + + x <- lkj_correlation(3, 2)[1, 2] + iid <- function(n) { + rlkjcorr(n, 3, 2)[, 1, 2] + } + + lkj_checked <- check_samples( + x = x, + iid_function = iid, + sampler = hmc(), + one_by_one = TRUE + ) + + # do the plotting + qqplot_checked_samples(lkj_checked) + + # do a formal hypothesis test + stat <- ks_test_mcmc_vs_iid(lkj_checked) + + expect_gte(stat$p.value, 0.01) +}) diff --git a/tests/testthat/test_posteriors_standard_uniform.R b/tests/testthat/test_posteriors_standard_uniform.R new file mode 100644 index 00000000..80d29674 --- /dev/null +++ b/tests/testthat/test_posteriors_standard_uniform.R @@ -0,0 +1,20 @@ +Sys.setenv("RELEASE_CANDIDATE" = "true") + +## TF1/2 this sampler fails +test_that("samplers are unbiased for standard uniform", { + skip_if_not(check_tf_version()) + + skip_if_not_release() + + x <- uniform(0, 1) + iid <- runif + + runif_checked <- check_samples(x, iid) + + qqplot_checked_samples(runif_checked) + + # do a formal hypothesis test + stat <- ks_test_mcmc_vs_iid(runif_checked) + + expect_gte(stat$p.value, 0.01) +}) diff --git a/tests/testthat/test_posteriors_wishart.R b/tests/testthat/test_posteriors_wishart.R new file mode 100644 index 00000000..2802dd04 --- /dev/null +++ b/tests/testthat/test_posteriors_wishart.R @@ -0,0 +1,31 @@ +Sys.setenv("RELEASE_CANDIDATE" = "false") + +test_that("samplers are unbiased for Wishart", { + skip_if_not(check_tf_version()) + + skip_if_not_release() + + sigma <- matrix( + c(1.2, 0.7, 0.7, 2.3), + 2, 2 + ) + df <- 4 + x <- wishart(df, sigma)[1, 2] + iid <- function(n) { + rWishart(n, df, sigma)[1, 2, ] + } + + wishart_checked <- check_samples( + x = x, + iid_function = iid, + one_by_one = TRUE + ) + + # do the plotting + qqplot_checked_samples(wishart_checked) + + # do a formal hypothesis test + stat <- ks_test_mcmc_vs_iid(wishart_checked) + + expect_gte(stat$p.value, 0.01) +}) From 1bf0946fad37313455d79631d20fcaed9d36dba8 Mon Sep 17 00:00:00 2001 From: njtierney Date: Wed, 16 Oct 2024 13:35:12 +1100 Subject: [PATCH 4/4] * add argument names in test_posteriors_wishart.R * update test_seed.R to use a slightly different machine precision via expect_equal rather than expect_idential --- tests/testthat/test_posteriors_wishart.R | 8 ++++++-- tests/testthat/test_seed.R | 8 +++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test_posteriors_wishart.R b/tests/testthat/test_posteriors_wishart.R index 2802dd04..1c4e7175 100644 --- a/tests/testthat/test_posteriors_wishart.R +++ b/tests/testthat/test_posteriors_wishart.R @@ -6,11 +6,15 @@ test_that("samplers are unbiased for Wishart", { skip_if_not_release() sigma <- matrix( - c(1.2, 0.7, 0.7, 2.3), - 2, 2 + data = c(1.2, 0.7, 0.7, 2.3), + nrow = 2, + ncol = 2 ) + df <- 4 + x <- wishart(df, sigma)[1, 2] + iid <- function(n) { rWishart(n, df, sigma)[1, 2, ] } diff --git a/tests/testthat/test_seed.R b/tests/testthat/test_seed.R index cb5c30a8..611ab871 100644 --- a/tests/testthat/test_seed.R +++ b/tests/testthat/test_seed.R @@ -38,12 +38,10 @@ test_that("when calculate simulates multiple values, they are calculated using t skip_if_not(check_tf_version()) x <- normal(0, 1) - x_squared <- x ^ 2 + x_2 <- x*1 - vals <- calculate(x, x_squared, nsim = 10) - - # use expect_equal - expect_identical(vals$x ^ 2, vals$x_squared) + vals <- calculate(x, x_2, nsim = 10) + expect_equal(vals$x, vals$x_2) }) test_that("calculate produces the right number of samples", {