Skip to content

Commit

Permalink
Merge pull request #37 from adrian-lison/develop
Browse files Browse the repository at this point in the history
Automatic distribution length choice
  • Loading branch information
adrian-lison authored Sep 20, 2024
2 parents 49de762 + 993c0b7 commit 7db58ab
Show file tree
Hide file tree
Showing 24 changed files with 132 additions and 77 deletions.
57 changes: 51 additions & 6 deletions R/utils_dists.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ get_gamma_sd_alternative <- function(gamma_shape, gamma_scale) {
#' @param gamma_cv Alternative parameterization: Coefficient of variation of the
#' Gamma distribution.
#' @param maxX Right truncation point. All probability mass beyond `maxX` will
#' be assigned to `maxX`.
#' be assigned to `maxX`. If `NULL` (default), this is automatically chosen
#' such that the last bin has less than 0.5% of the probability mass.
#' @param include_zero Should the distribution explicitly cover X=0, or should
#' X=1 include the probability mass for X=0 too?
#' @param print_params Should the shape and rate parameters be printed?
Expand All @@ -68,7 +69,7 @@ get_discrete_gamma <- function(gamma_shape,
gamma_mean,
gamma_sd,
gamma_cv,
maxX,
maxX = NULL,
include_zero = TRUE,
print_params = FALSE) {
if (missing(gamma_shape)) {
Expand Down Expand Up @@ -97,6 +98,24 @@ get_discrete_gamma <- function(gamma_shape,
# shortest period (combines periods 0 and 1)
shortest <- pgamma(2, shape = gamma_shape, rate = gamma_rate)
# longest period (combines all periods >= maxX)
if (is.null(maxX)) {
maxX <- which(sapply(1:100, function(maxX) {
(1 - pgamma(maxX, shape = gamma_shape, rate = gamma_rate)) < 0.005
}))[1]
if (is.na(maxX)) {
maxX <- 100
cli::cli_inform(c(
"!" = paste0("Maximum length of distribution was set to 100. ",
"The last bin covers ",
round(
(1 - pgamma(maxX, shape = gamma_shape, rate = gamma_rate)),
4
),
"% of the probability mass."
)
))
}
}
longest <- (1 - pgamma(maxX, shape = gamma_shape, rate = gamma_rate))

if (include_zero) {
Expand Down Expand Up @@ -138,13 +157,20 @@ get_discrete_gamma <- function(gamma_shape,
#' @param gamma_mean Mean of the shifted Gamma distribution.
#' @param gamma_sd Standard deviation of the shifted Gamma distribution.
#' @param maxX Right truncation point. All probability mass beyond `maxX` will
#' be assigned to `maxX`.
#' be assigned to `maxX`. If `NULL` (default), this is automatically chosen
#' such that the last bin has approximately less than 0.5% of the probability
#' mass.
#'
#' @return A numeric vector representing the PMF of the discretized shifted
#' Gamma distribution.
#' @export
get_discrete_gamma_shifted <- function(
gamma_mean, gamma_sd, maxX = 10) {
gamma_mean, gamma_sd, maxX = NULL) {
maxX <- 1 + length(get_discrete_gamma(
gamma_mean = gamma_mean,
gamma_sd = gamma_sd,
maxX = maxX
))
k <- 1:maxX
if (gamma_sd < 0) {
stop("gamma_sd must be >=0.")
Expand Down Expand Up @@ -265,15 +291,16 @@ get_lognormal_sigma_alternative <- function(mu, unit_mean = NULL, unit_sd = NULL
#' @param unit_cv Alternative parameterization: unit/natural scale coefficient
#' of variation.
#' @param maxX Right truncation point. All probability mass beyond `maxX` will
#' be assigned to `maxX`.
#' be assigned to `maxX`. If `NULL` (default), this is automatically chosen
#' such that the last bin has less than 0.5% of the probability mass.
#' @param include_zero Should the distribution explicitly cover X=0, or should
#' X=1 include the probability mass for X=0 too?
#' @param print_params Should the log-level parameters be printed?
#'
#' @return A numeric vector representing the PMF of the discretized lognormal.
#' @export
get_discrete_lognormal <- function(
meanlog, sdlog, unit_mean = NULL, unit_sd = NULL, unit_cv = NULL, maxX, include_zero = TRUE,
meanlog, sdlog, unit_mean = NULL, unit_sd = NULL, unit_cv = NULL, maxX = NULL, include_zero = TRUE,
print_params = FALSE) {
if (!is.null(unit_mean) && (!is.null(unit_sd) || !is.null(unit_cv))) {
if (is.null(unit_sd)) {
Expand All @@ -286,6 +313,24 @@ get_discrete_lognormal <- function(
# shortest period (combines periods 0 and 1)
shortest <- plnorm(2, meanlog = meanlog, sdlog = sdlog)
# longest period (combines all periods >= maxX)
if (is.null(maxX)) {
maxX <- which(sapply(1:100, function(maxX) {
(1 - plnorm(maxX, meanlog = meanlog, sdlog = sdlog)) < 0.005
}))[1]
if (is.na(maxX)) {
maxX <- 100
cli::cli_inform(c(
"!" = paste0("Maximum length of distribution was set to 100. ",
"The last bin covers ",
round(
(1 - plnorm(maxX, meanlog = meanlog, sdlog = sdlog)),
4
),
"of the probability mass."
)
))
}
}
longest <- (1 - plnorm(maxX, meanlog = meanlog, sdlog = sdlog))

if (include_zero) {
Expand Down
8 changes: 4 additions & 4 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -206,10 +206,10 @@ discretized versions of popular continuous probability distributions.

```{r}
ww_assumptions <- sewer_assumptions(
generation_dist = get_discrete_gamma_shifted(gamma_mean = 3, gamma_sd = 2.4, maxX = 12),
shedding_dist = get_discrete_gamma(gamma_shape = 0.929639, gamma_scale = 7.241397, maxX = 30),
generation_dist = get_discrete_gamma_shifted(gamma_mean = 3, gamma_sd = 2.4),
shedding_dist = get_discrete_gamma(gamma_shape = 0.929639, gamma_scale = 7.241397),
shedding_reference = "symptom_onset", # shedding load distribution is relative to symptom onset
incubation_dist = get_discrete_gamma(gamma_shape = 8.5, gamma_scale = 0.4, maxX = 10),
incubation_dist = get_discrete_gamma(gamma_shape = 8.5, gamma_scale = 0.4),
)
```

Expand All @@ -224,7 +224,7 @@ Now that we have the data and necessary assumptions, we can use `EpiSewer` to es
Stan regularly provides updates about the progress of the sampler. The overall
runtime will depend on your hardware resources, the size of the data,
the complexity of the model used, and how well the model actually fits the data.
On a MacBook Pro (2 GHz Quad-Core Intel Core i5) the example below takes about 4 minutes to run.
On a MacBook Pro (2 GHz Quad-Core Intel Core i5) the example below takes about 5 minutes to run.
```{r, eval = FALSE}
options(mc.cores = 4) # allow stan to use 4 cores, i.e. one for each chain
ww_result <- EpiSewer(
Expand Down
24 changes: 12 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -274,10 +274,10 @@ probability distributions.

``` r
ww_assumptions <- sewer_assumptions(
generation_dist = get_discrete_gamma_shifted(gamma_mean = 3, gamma_sd = 2.4, maxX = 12),
shedding_dist = get_discrete_gamma(gamma_shape = 0.929639, gamma_scale = 7.241397, maxX = 30),
generation_dist = get_discrete_gamma_shifted(gamma_mean = 3, gamma_sd = 2.4),
shedding_dist = get_discrete_gamma(gamma_shape = 0.929639, gamma_scale = 7.241397),
shedding_reference = "symptom_onset", # shedding load distribution is relative to symptom onset
incubation_dist = get_discrete_gamma(gamma_shape = 8.5, gamma_scale = 0.4, maxX = 10),
incubation_dist = get_discrete_gamma(gamma_shape = 8.5, gamma_scale = 0.4),
)
```

Expand All @@ -299,7 +299,7 @@ Stan regularly provides updates about the progress of the sampler. The
overall runtime will depend on your hardware resources, the size of the
data, the complexity of the model used, and how well the model actually
fits the data. On a MacBook Pro (2 GHz Quad-Core Intel Core i5) the
example below takes about 4 minutes to run.
example below takes about 5 minutes to run.

``` r
options(mc.cores = 4) # allow stan to use 4 cores, i.e. one for each chain
Expand Down Expand Up @@ -547,11 +547,11 @@ number.
``` r
head(ww_result$summary$R, 5)
#> date mean median lower_0.95 lower_0.5 upper_0.5 upper_0.95
#> 1: 2021-12-06 1.049711 1.044085 0.7324020 0.9372597 1.152850 1.395459
#> 2: 2021-12-07 1.050463 1.042825 0.7470177 0.9427265 1.150043 1.380040
#> 3: 2021-12-08 1.051356 1.041470 0.7572534 0.9485155 1.150755 1.377987
#> 4: 2021-12-09 1.052399 1.044530 0.7605042 0.9530230 1.149892 1.369603
#> 5: 2021-12-10 1.053602 1.047620 0.7645480 0.9585617 1.151843 1.366598
#> 1: 2021-12-03 1.042238 1.044795 0.7012397 0.9294330 1.157058 1.365061
#> 2: 2021-12-04 1.043177 1.044975 0.7155260 0.9338175 1.155830 1.355124
#> 3: 2021-12-05 1.044297 1.045450 0.7303710 0.9367903 1.154685 1.346828
#> 4: 2021-12-06 1.045626 1.045795 0.7430955 0.9419335 1.152473 1.342926
#> 5: 2021-12-07 1.047189 1.046505 0.7521539 0.9480593 1.147990 1.336999
#> type seeding
#> 1: estimate TRUE
#> 2: estimate TRUE
Expand All @@ -575,7 +575,7 @@ ww_result$fitted$diagnostic_summary()
#> [1] 0 0 0 0
#>
#> $ebfmi
#> [1] 1.0296476 0.9815957 1.0001229 0.8657642
#> [1] 0.9014503 0.9680378 0.9334176 0.9486877
```

Finally, the `checksums` attribute gives us several checksums that
Expand All @@ -590,7 +590,7 @@ ww_result$checksums
#> [1] "6346549bd7c2ac9a9d200503d7356a29"
#>
#> $input
#> [1] "fd1bc664ef6320ad9f8427d3a0f3d18c"
#> [1] "c6dad2f8ed2b6fdaf952926f3eb3e97d"
#>
#> $fit_opts
#> [1] "5309bbbc3cd1cc109eac60d2fc82de45"
Expand All @@ -599,5 +599,5 @@ ww_result$checksums
#> [1] "e92f83d0ca5d22b3bb5849d62c5412ee"
#>
#> $init
#> [1] "30505348d747504aa36fe8fb6bdfaaf3"
#> [1] "82126b86311c9716bda5d21c04c52142"
```
4 changes: 2 additions & 2 deletions data-raw/Influenza Zurich.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ measurements <- measurements[date <= as.Date("2022-12-19")]
ww_data_influenza_Zurich <- sewer_data(measurements = measurements, flows = Influenza_A_Zurich$flows)
usethis::use_data(ww_data_influenza_Zurich, overwrite = TRUE)

generation_dist <- get_discrete_gamma_shifted(gamma_mean = 2.6, gamma_sd = 1.5, maxX = 8)
shedding_dist <- get_discrete_gamma(gamma_mean = 2.491217, gamma_sd = 1.004283, maxX = 6)
generation_dist <- get_discrete_gamma_shifted(gamma_mean = 2.6, gamma_sd = 1.5)
shedding_dist <- get_discrete_gamma(gamma_mean = 2.491217, gamma_sd = 1.004283)
ww_assumptions_influenza_Zurich <- sewer_assumptions(
generation_dist = generation_dist,
shedding_dist = shedding_dist,
Expand Down
6 changes: 3 additions & 3 deletions data-raw/SARS-CoV-2 Zurich.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ measurements_sparse <- data_zurich$measurements[,weekday := weekdays(data_zurich
ww_data_SARS_CoV_2_Zurich <- sewer_data(measurements = measurements_sparse, flows = data_zurich$flows, cases = data_zurich$cases)
usethis::use_data(ww_data_SARS_CoV_2_Zurich, overwrite = TRUE)

generation_dist <- get_discrete_gamma_shifted(gamma_mean = 3, gamma_sd = 2.4, maxX = 12)
incubation_dist <- get_discrete_gamma(gamma_shape = 8.5, gamma_scale = 0.4, maxX = 10)
shedding_dist <- get_discrete_gamma(gamma_shape = 0.929639, gamma_scale = 7.241397, maxX = 30)
generation_dist <- get_discrete_gamma_shifted(gamma_mean = 3, gamma_sd = 2.4)
incubation_dist <- get_discrete_gamma(gamma_shape = 8.5, gamma_scale = 0.4)
shedding_dist <- get_discrete_gamma(gamma_shape = 0.929639, gamma_scale = 7.241397)
ww_assumptions_SARS_CoV_2_Zurich <- sewer_assumptions(
generation_dist = generation_dist,
incubation_dist = incubation_dist,
Expand Down
Binary file modified data/ww_assumptions_SARS_CoV_2_Zurich.rda
Binary file not shown.
Binary file modified data/ww_assumptions_influenza_Zurich.rda
Binary file not shown.
Binary file modified man/figures/README-R-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-concentration-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-concentration_normalized-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-growth_report-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-growth_report2-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-infections-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-infections_with_cases-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-load-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-prior_posterior_noise_cv-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
9 changes: 7 additions & 2 deletions man/get_discrete_gamma.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions man/get_discrete_gamma_shifted.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 7 additions & 2 deletions man/get_discrete_lognormal.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 7 additions & 7 deletions vignettes/detailed-example.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@ The `shedding` module describes how infected individuals shed pathogen particles
```{r}
ww_shedding <- model_shedding(
shedding_dist = shedding_dist_assume(
get_discrete_gamma(gamma_shape = 0.929639, gamma_scale = 7.241397, maxX = 30), shedding_reference = "symptom_onset"),
incubation_dist = incubation_dist_assume(get_discrete_gamma(gamma_shape = 8.5, gamma_scale = 0.4, maxX = 10)),
get_discrete_gamma(gamma_shape = 0.929639, gamma_scale = 7.241397), shedding_reference = "symptom_onset"),
incubation_dist = incubation_dist_assume(get_discrete_gamma(gamma_shape = 8.5, gamma_scale = 0.4)),
load_per_case = load_per_case_calibrate(cases = data_zurich$cases),
load_variation = load_variation_estimate()
)
Expand All @@ -173,7 +173,7 @@ The `infections` module describes the underlying process leading to infected ind

```{r}
ww_infections <- model_infections(
generation_dist = generation_dist_assume(get_discrete_gamma_shifted(gamma_mean = 3, gamma_sd = 2.4, maxX = 12)),
generation_dist = generation_dist_assume(get_discrete_gamma_shifted(gamma_mean = 3, gamma_sd = 2.4)),
R = R_estimate_splines(),
seeding = seeding_estimate_rw(),
infection_noise = infection_noise_estimate()
Expand Down Expand Up @@ -212,7 +212,7 @@ We have now specified all six `EpiSewer` modules. While this seems like a lot of

```{r}
ww_infections <- model_infections(
generation_dist = generation_dist_assume(get_discrete_gamma_shifted(gamma_mean = 3, gamma_sd = 2.4, maxX = 12))
generation_dist = generation_dist_assume(get_discrete_gamma_shifted(gamma_mean = 3, gamma_sd = 2.4))
)
```

Expand Down Expand Up @@ -324,14 +324,14 @@ ww_sewage <- model_sewage(
)
ww_shedding <- model_shedding(
shedding_dist = shedding_dist_assume(get_discrete_gamma(gamma_shape = 0.929639, gamma_scale = 7.241397, maxX = 30), shedding_reference = "symptom_onset"),
incubation_dist = incubation_dist_assume(get_discrete_gamma(gamma_shape = 8.5, gamma_scale = 0.4, maxX = 10)),
shedding_dist = shedding_dist_assume(get_discrete_gamma(gamma_shape = 0.929639, gamma_scale = 7.241397), shedding_reference = "symptom_onset"),
incubation_dist = incubation_dist_assume(get_discrete_gamma(gamma_shape = 8.5, gamma_scale = 0.4)),
load_per_case = load_per_case_calibrate(cases = data_zurich$cases),
load_variation = load_variation_estimate()
)
ww_infections <- model_infections(
generation_dist = generation_dist_assume(get_discrete_gamma_shifted(gamma_mean = 3, gamma_sd = 2.4, maxX = 12)),
generation_dist = generation_dist_assume(get_discrete_gamma_shifted(gamma_mean = 3, gamma_sd = 2.4)),
R = R_estimate_splines(),
seeding = seeding_estimate_rw(),
infection_noise = infection_noise_estimate()
Expand Down
Loading

0 comments on commit 7db58ab

Please sign in to comment.