Merge pull request #112 from egouldo/88-no-euc-standardisation

Fix #88 conditionally apply standardisation to datasets
egouldo · Aug 14, 2024 · 42b1e38 · 42b1e38
2 parents 5a586a0 + 7bd3ca8
commit 42b1e38
Show file tree

Hide file tree

Showing 108 changed files with 2,033 additions and 1,674 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: ManyEcoEvo
 Title: Meta-analyse data from 'Many-Analysts' style studies
-Version: 2.3.0.9003
+Version: 2.4.1.9000
 Authors@R: c(
     person("Elliot", "Gould", , "[email protected]", role = c("aut", "cre"),
            comment = c(ORCID = "https://orcid.org/0000-0002-6585-538X")),

diff --git a/ManyEcoEvo.Rproj b/ManyEcoEvo.Rproj
@@ -15,3 +15,4 @@ LaTeX: pdfLaTeX
 BuildType: Package
 PackageUseDevtools: Yes
 PackageInstallArgs: --no-multiarch --with-keep.source
+PackageRoxygenize: rd,collate,namespace,vignette
diff --git a/NAMESPACE b/NAMESPACE
@@ -53,6 +53,7 @@ export(generate_rating_subsets)
 export(generate_yi_subsets)
 export(get_MA_fit_stats)
 export(get_diversity_data)
+export(get_forest_plot_data)
 export(gg_forest)
 export(identity_back)
 export(inverse_back)
@@ -65,6 +66,7 @@ export(meta_analyse_datasets)
 export(named_group_split)
 export(plot_cont_rating_effects)
 export(plot_effects_diversity)
+export(plot_forest)
 export(plot_model_means_box_cox_cat)
 export(plot_model_means_orchard)
 export(power_back)
@@ -105,9 +107,14 @@ export(summarise_variable_counts)
 export(validate_predictions)
 export(validate_predictions_df_blue_tit)
 export(validate_predictions_df_euc)
+import(NatParksPalettes)
+import(broom)
+import(broom.mixed)
 import(cli)
 import(dplyr)
+import(forcats)
 import(ggbeeswarm)
+import(ggforestplot)
 import(ggplot2)
 import(lme4)
 import(metafor)
@@ -118,29 +125,12 @@ import(see)
 import(stringr)
 import(tidyr)
 importFrom(EnvStats,stat_n_text)
-importFrom(broom,tidy)
 importFrom(broom.mixed,tidy)
 importFrom(cli,cli_abort)
 importFrom(cli,cli_alert_info)
 importFrom(cli,cli_alert_warning)
 importFrom(cli,cli_h2)
 importFrom(data.table,setnames)
-importFrom(dplyr,across)
-importFrom(dplyr,count)
-importFrom(dplyr,distinct)
-importFrom(dplyr,ends_with)
-importFrom(dplyr,everything)
-importFrom(dplyr,filter)
-importFrom(dplyr,full_join)
-importFrom(dplyr,group_by)
-importFrom(dplyr,group_split)
-importFrom(dplyr,left_join)
-importFrom(dplyr,mutate)
-importFrom(dplyr,n_distinct)
-importFrom(dplyr,rename)
-importFrom(dplyr,right_join)
-importFrom(dplyr,select)
-importFrom(dplyr,summarise)
 importFrom(forcats,fct_relevel)
 importFrom(ggplot2,ggplot)
 importFrom(glue,glue)
@@ -150,17 +140,18 @@ importFrom(magrittr,"%>%")
 importFrom(metaviz,viz_funnel)
 importFrom(parameters,parameters)
 importFrom(performance,performance)
+importFrom(pointblank,col_exists)
 importFrom(pointblank,col_vals_not_null)
 importFrom(pointblank,has_columns)
 importFrom(pointblank,stop_if_not)
 importFrom(pointblank,test_col_vals_gte)
+importFrom(pointblank,vars)
 importFrom(purrr,keep)
 importFrom(purrr,list_flatten)
 importFrom(purrr,list_rbind)
 importFrom(purrr,map)
 importFrom(purrr,map2)
 importFrom(purrr,map_chr)
-importFrom(purrr,map_dfr)
 importFrom(purrr,map_if)
 importFrom(purrr,pluck)
 importFrom(purrr,pmap)

diff --git a/NEWS.md b/NEWS.md
@@ -0,0 +1,5 @@
+# ManyEcoEvo (development version)
+
+# ManyEcoEvo 2.4.1
+
+* Initial CRAN submission.
diff --git a/R/back_transformations.R b/R/back_transformations.R
@@ -5,8 +5,8 @@
 #' 
 #' @details We assume analysts' estimates are normally distributed. Each function uses a normal distribution to simulate the a distribution of effect-sizes and their standard errors. Next this distribution is back-transformed to the desired response scale. The mean `m_est`, standard error `se_est`, and quantiles (`lower` and `upper`) of the back-transformed distribution are returned within a dataframe.
 #' @param beta Analyst beta estimate
-#' @param se Standard error of analyst's effect size estimate $\\beta$ 
-#' or out-of-sample prediction estimate $y\\_i$.
+#' @param se Standard error of analyst's effect size estimate \eqn{\beta} 
+#' or out-of-sample prediction estimate \eqn{y_i}.
 #' @param sim numeric vector of length 1. number of simulations.
 #' @return data frame containing the mean estimate, its standard error, and quantiles.
 #' @family back transformation
@@ -71,7 +71,7 @@ probit_back <- function(beta, se, sim) {
   return(set)
 }
 
-#' @describeIn back Back transform beta estimates for models with $1/x$ link
+#' @describeIn back Back transform beta estimates for models with \eqn{1/x} link
 #' @export
 inverse_back <- function(beta, se, sim) {
   simulated <- rnorm(sim, beta, se)
@@ -90,7 +90,7 @@ inverse_back <- function(beta, se, sim) {
   return(set)
 }
 
-#' @describeIn back Back transform beta estimates for models with $x^2$-link
+#' @describeIn back Back transform beta estimates for models with \eqn{x^2}-link
 #' @export
 square_back <- function(beta, se, sim) {
   simulated <- rnorm(sim, beta, se)
@@ -109,7 +109,7 @@ square_back <- function(beta, se, sim) {
   return(set)
 }
 
-#' @describeIn back Back transform beta estimates for models with $x^3$-link
+#' @describeIn back Back transform beta estimates for models with \eqn{x^3}-link
 #' @export
 cube_back <- function(beta, se, sim) {
   simulated <- rnorm(sim, beta, se)

diff --git a/R/convert_predictions.R b/R/convert_predictions.R
@@ -92,7 +92,7 @@ convert_predictions <- function(augmented_data,
         sim = 10000
       ) %>%
         t()
-    } else { # Back-transform response & link-function
+    } else { # Back-transform response AND link-function
 
       if (rlang::is_na(response_transformation) | rlang::is_na(link_fun)) {
         cli::cli_alert_warning("Missing Value for {.arg response_transformation}, returning {.val NA}")

diff --git a/R/get_MA_fit_stats.R b/R/get_MA_fit_stats.R
@@ -1,6 +1,6 @@
 #' Extract meta-analytic statistics like \eqn{I^2}, etc.
 #' @param MA_mod a fitted model of class rma.mv
-#' @return A tidy tibble with \eqn{\sigma^2} and \eqn{I^2} estimates for \code{MA_mod}
+#' @return A tidy tibble with \eqn{\sigma^2} and \eqn{I^2} estimates for `MA_mod`
 #' @export
 get_MA_fit_stats <- function(MA_mod) {
   stopifnot("MA_mod must be an object of class rma.mv" = "rma.mv" %in% class(MA_mod))

diff --git a/R/get_forest_plot_data.R b/R/get_forest_plot_data.R
@@ -0,0 +1,90 @@
+#' @title Get Forest Plot Data from a Metafor Model
+#'
+#' @param model A metafor model object of class `rma.mv` or `rma.uni`
+#' @return A tibble containing the data required to plot a forest plot
+#' @export
+#' @family Forest Plot Functions
+#' @examples
+#' get_forest_plot_data(model)
+#' @import broom.mixed
+#' @import dplyr
+#' @import forcats
+#' @import stringr
+get_forest_plot_data <- function(model){
+  model %>% 
+    broom.mixed::tidy(conf.int = TRUE, include_studies = TRUE) %>% 
+    dplyr::mutate(
+      point_shape = 
+        ifelse(stringr::str_detect(term, "overall"), 
+               "diamond", 
+               "circle"),
+      term = 
+        forcats::fct_reorder(term, 
+                             estimate) %>% 
+        forcats::fct_reorder(., 
+                             point_shape,
+                             .desc = TRUE),
+      parameter_type = case_when(str_detect(term, "overall") ~ "mean",
+                                 TRUE ~ "study"),
+      meta_analytic_mean =  pull(., estimate, type) %>% 
+        keep_at(at = "summary")) %>% 
+    select(-type, Parameter = term, everything())
+}
+
+#' @title Plot a Forest Plot
+#' @description Plot a forest plot using the data from `get_forest_plot_data`
+#' @param data A tibble containing the data required to plot a forest plot
+#' @param intercept Logical. Should a horizontal line be added at 0?
+#' @param MA_mean Logical. Should a dashed line be added at the meta-analytic mean?
+#' @return A ggplot object
+#' @export
+#' @import ggplot2
+#' @import ggforestplot
+#' @import NatParksPalettes
+#' @family Forest Plot Functions
+#' @examples
+#' model <- ManyEcoEvo_results %>% pluck("MA_mod", 1) 
+#' plot_data <- get_forest_plot_data(model)
+#' plot_forest(plot_data)
+#' plot_forest(plot_data, intercept = FALSE)
+#' plot_forest(plot_data, MA_mean = FALSE)
+#' plot_forest(plot_data, intercept = FALSE, MA_mean = FALSE)
+plot_forest <- function(data, intercept = TRUE, MA_mean = TRUE){
+  if (MA_mean == FALSE) {
+    data <- filter(data, Parameter != "overall")
+  }
+
+  p <- ggplot(data, aes(y = estimate, 
+                        x =  Parameter, 
+                        ymin = conf.low, 
+                        ymax = conf.high,
+                        shape = point_shape,
+                        colour = parameter_type)) +
+    geom_pointrange(fatten = 2) +
+    ggforestplot::theme_forest() +
+    theme(axis.line = element_line(linewidth = 0.10, colour = "black"),
+          axis.line.y = element_blank(),
+          text = element_text(family = "Helvetica")#,
+          # axis.text.y = element_blank()
+    ) +
+    guides(shape = guide_legend(title = NULL), 
+           colour = guide_legend(title = NULL)) +
+    coord_flip() +
+    ylab(bquote(Standardised~Effect~Size~Z[r])) +
+    xlab(element_blank()) +
+    # scale_y_continuous(breaks = c(-4,-3,-2,-1,0,1),
+    # minor_breaks = seq(from = -4.5, to = 1.5, by = 0.5)) +
+    NatParksPalettes::scale_color_natparks_d("Glacier")
+
+  if (intercept == TRUE) {
+    p <- p + geom_hline(yintercept = 0)
+  }
+  if (MA_mean == TRUE) {
+    p <- p + geom_hline(aes(yintercept = meta_analytic_mean), 
+                        data = data,
+                        colour = "#01353D", 
+                        linetype = "dashed")
+  }
+
+  return(p)
+}
diff --git a/R/prepare_response_variables.R b/R/prepare_response_variables.R
@@ -3,31 +3,105 @@
 #' @param ManyEcoEvo Complete ManyEcoEvo dataset containing nested datasets for each different analysis and exclusion set dataset
 #' @param estimate_type A character string of length 1, equal to either "Zr", "yi", "y25", "y50", "y75", indicating what type of estimates are being prepared.
 #' @param param_table A table of parameters \(mean, sd\) for *most* response variables used by analysts. This tibble is pulled from the named object exported by `ManyEcoEvo::`. but can be overwritten with the users's own `param_table` dataset.
-#'
+#' @param dataset_standardise A character string of length 1, equal to the name of the dataset to standardise the response variables to. If `NULL`, all datasets are standardised.
 #' @return A tibble of nested list-columns
 #' @details Operates on nested list-columns of dataframes, where each dataframe contains the response variable data for a single analysis. The function standardises the response variable data for each analysis, and returns the modified dataset to the `data` list-column.
 #' @family targets-pipeline functions.
 #' @family Multi-dataset Wrapper Functions
 #' @export
+#' @import cli
+#' @import dplyr
+#' @import purrr
+#' @import rlang
 prepare_response_variables <- function(ManyEcoEvo,
                                        estimate_type = character(1L),
-                                       param_table = NULL) {
-  stopifnot(is.data.frame(ManyEcoEvo))
-  # TODO run checks on ManyEcoEvo
+                                       param_table = NULL,
+                                       dataset_standardise = NULL) {
   match.arg(estimate_type, choices = c("Zr", "yi", "y25", "y50", "y75"), several.ok = FALSE)
-  out <- ManyEcoEvo %>%
-    ungroup() %>%
-    # dplyr::group_by(dataset) %>% #NOTE: mapping doesn't work properly when tibble is rowwise!
-    dplyr::mutate(data = purrr::map2(
-      .x = data, .y = dataset,
-      .f = ~ standardise_response(
-        dat = .x,
-        estimate_type = !!{
-          estimate_type
-        },
-        param_table,
-        dataset = .y
+  stopifnot(is.data.frame(ManyEcoEvo))
+  pointblank::expect_col_exists(object = ManyEcoEvo, columns = c(dataset, data))
+  if (!is.null(dataset_standardise)) {
+    stopifnot(is.character(dataset_standardise))
+    stopifnot(length(dataset_standardise) >= 1)
+    stopifnot(length(dataset_standardise) <= length(unique(ManyEcoEvo$dataset)))
+    match.arg(dataset_standardise, choices = ManyEcoEvo$dataset, several.ok = TRUE)
+  }
+
+  out <- ManyEcoEvo
+
+  if (estimate_type != "Zr") {
+    if (is.null(param_table)) {
+
+      cli::cli_abort("{.arg param_table} must be supplied for {.val {estimate_type}} data")
+    }
+
+    # ------ Back transform if estimate_type is yi only ------
+    out <- out %>%
+      ungroup() %>%
+      dplyr::mutate(
+        data = purrr::map2( #TODO assign to data or not?
+          .x = data, 
+          .y = dataset,
+          .f = ~ back_transform_response_vars_yi(
+            dat = .x,
+            estimate_type = !!{
+              estimate_type
+            },
+            dataset = .y
+          )
+        ),
+        diversity_data = 
+          map2(
+            .x = diversity_data,
+            .y = data,
+            .f = ~ semi_join(.x, .y, by = join_by(id_col)) %>% 
+              distinct()
+          )
       )
-    ))
+
+  } else {
+    if (!is.null(param_table)) {
+      cli::cli_abort("{.arg param_table} must be NULL for {.val {estimate_type}} data")
+    }
+  }
+
+  # ------ Standardise Response Variables for Meta-analysis ------
+
+  if (is.null(dataset_standardise)) {
+    out <- out %>%
+      ungroup() %>%
+      dplyr::mutate(data = purrr::map2(
+        .x = data, 
+        .y = dataset,
+        .f = ~ standardise_response(
+          dat = .x,
+          estimate_type = !!{
+            estimate_type
+          },
+          param_table,
+          dataset = .y
+        )
+      ))
+  } else {
+
+    datasets_to_standardise <- tibble(
+      dataset = dataset_standardise,
+      fns = list(standardise_response)
+    )
+
+    pmap_prepare_response <- function(data, estimate_type, param_table, dataset, fns, ...){
+      fns(data, estimate_type, param_table, dataset)
+    }
+
+    out <- out %>%
+      ungroup() %>%
+      left_join(datasets_to_standardise, by = "dataset") %>% 
+      mutate(fns = coalesce(fns, list(process_response))) %>%
+      mutate(data = pmap(.l = ., 
+                         .f = pmap_prepare_response, 
+                         estimate_type = estimate_type, 
+                         param_table = param_table)) %>% 
+      select(-fns) #TODO drop ci cols or not??
+  }
   return(out)
 }