Skip to content

Commit

Permalink
Merge pull request #112 from egouldo/88-no-euc-standardisation
Browse files Browse the repository at this point in the history
Fix #88 conditionally apply standardisation to datasets
  • Loading branch information
egouldo authored Aug 14, 2024
2 parents 5a586a0 + 7bd3ca8 commit 42b1e38
Show file tree
Hide file tree
Showing 108 changed files with 2,033 additions and 1,674 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: ManyEcoEvo
Title: Meta-analyse data from 'Many-Analysts' style studies
Version: 2.3.0.9003
Version: 2.4.1.9000
Authors@R: c(
person("Elliot", "Gould", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "https://orcid.org/0000-0002-6585-538X")),
Expand Down
1 change: 1 addition & 0 deletions ManyEcoEvo.Rproj
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ LaTeX: pdfLaTeX
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace,vignette
27 changes: 9 additions & 18 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ export(generate_rating_subsets)
export(generate_yi_subsets)
export(get_MA_fit_stats)
export(get_diversity_data)
export(get_forest_plot_data)
export(gg_forest)
export(identity_back)
export(inverse_back)
Expand All @@ -65,6 +66,7 @@ export(meta_analyse_datasets)
export(named_group_split)
export(plot_cont_rating_effects)
export(plot_effects_diversity)
export(plot_forest)
export(plot_model_means_box_cox_cat)
export(plot_model_means_orchard)
export(power_back)
Expand Down Expand Up @@ -105,9 +107,14 @@ export(summarise_variable_counts)
export(validate_predictions)
export(validate_predictions_df_blue_tit)
export(validate_predictions_df_euc)
import(NatParksPalettes)
import(broom)
import(broom.mixed)
import(cli)
import(dplyr)
import(forcats)
import(ggbeeswarm)
import(ggforestplot)
import(ggplot2)
import(lme4)
import(metafor)
Expand All @@ -118,29 +125,12 @@ import(see)
import(stringr)
import(tidyr)
importFrom(EnvStats,stat_n_text)
importFrom(broom,tidy)
importFrom(broom.mixed,tidy)
importFrom(cli,cli_abort)
importFrom(cli,cli_alert_info)
importFrom(cli,cli_alert_warning)
importFrom(cli,cli_h2)
importFrom(data.table,setnames)
importFrom(dplyr,across)
importFrom(dplyr,count)
importFrom(dplyr,distinct)
importFrom(dplyr,ends_with)
importFrom(dplyr,everything)
importFrom(dplyr,filter)
importFrom(dplyr,full_join)
importFrom(dplyr,group_by)
importFrom(dplyr,group_split)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,n_distinct)
importFrom(dplyr,rename)
importFrom(dplyr,right_join)
importFrom(dplyr,select)
importFrom(dplyr,summarise)
importFrom(forcats,fct_relevel)
importFrom(ggplot2,ggplot)
importFrom(glue,glue)
Expand All @@ -150,17 +140,18 @@ importFrom(magrittr,"%>%")
importFrom(metaviz,viz_funnel)
importFrom(parameters,parameters)
importFrom(performance,performance)
importFrom(pointblank,col_exists)
importFrom(pointblank,col_vals_not_null)
importFrom(pointblank,has_columns)
importFrom(pointblank,stop_if_not)
importFrom(pointblank,test_col_vals_gte)
importFrom(pointblank,vars)
importFrom(purrr,keep)
importFrom(purrr,list_flatten)
importFrom(purrr,list_rbind)
importFrom(purrr,map)
importFrom(purrr,map2)
importFrom(purrr,map_chr)
importFrom(purrr,map_dfr)
importFrom(purrr,map_if)
importFrom(purrr,pluck)
importFrom(purrr,pmap)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# ManyEcoEvo (development version)

# ManyEcoEvo 2.4.1

* Initial CRAN submission.
10 changes: 5 additions & 5 deletions R/back_transformations.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
#'
#' @details We assume analysts' estimates are normally distributed. Each function uses a normal distribution to simulate the a distribution of effect-sizes and their standard errors. Next this distribution is back-transformed to the desired response scale. The mean `m_est`, standard error `se_est`, and quantiles (`lower` and `upper`) of the back-transformed distribution are returned within a dataframe.
#' @param beta Analyst beta estimate
#' @param se Standard error of analyst's effect size estimate $\\beta$
#' or out-of-sample prediction estimate $y\\_i$.
#' @param se Standard error of analyst's effect size estimate \eqn{\beta}
#' or out-of-sample prediction estimate \eqn{y_i}.
#' @param sim numeric vector of length 1. number of simulations.
#' @return data frame containing the mean estimate, its standard error, and quantiles.
#' @family back transformation
Expand Down Expand Up @@ -71,7 +71,7 @@ probit_back <- function(beta, se, sim) {
return(set)
}

#' @describeIn back Back transform beta estimates for models with $1/x$ link
#' @describeIn back Back transform beta estimates for models with \eqn{1/x} link
#' @export
inverse_back <- function(beta, se, sim) {
simulated <- rnorm(sim, beta, se)
Expand All @@ -90,7 +90,7 @@ inverse_back <- function(beta, se, sim) {
return(set)
}

#' @describeIn back Back transform beta estimates for models with $x^2$-link
#' @describeIn back Back transform beta estimates for models with \eqn{x^2}-link
#' @export
square_back <- function(beta, se, sim) {
simulated <- rnorm(sim, beta, se)
Expand All @@ -109,7 +109,7 @@ square_back <- function(beta, se, sim) {
return(set)
}

#' @describeIn back Back transform beta estimates for models with $x^3$-link
#' @describeIn back Back transform beta estimates for models with \eqn{x^3}-link
#' @export
cube_back <- function(beta, se, sim) {
simulated <- rnorm(sim, beta, se)
Expand Down
2 changes: 1 addition & 1 deletion R/convert_predictions.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ convert_predictions <- function(augmented_data,
sim = 10000
) %>%
t()
} else { # Back-transform response & link-function
} else { # Back-transform response AND link-function

if (rlang::is_na(response_transformation) | rlang::is_na(link_fun)) {
cli::cli_alert_warning("Missing Value for {.arg response_transformation}, returning {.val NA}")
Expand Down
2 changes: 1 addition & 1 deletion R/get_MA_fit_stats.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Extract meta-analytic statistics like \eqn{I^2}, etc.
#' @param MA_mod a fitted model of class rma.mv
#' @return A tidy tibble with \eqn{\sigma^2} and \eqn{I^2} estimates for \code{MA_mod}
#' @return A tidy tibble with \eqn{\sigma^2} and \eqn{I^2} estimates for `MA_mod`
#' @export
get_MA_fit_stats <- function(MA_mod) {
stopifnot("MA_mod must be an object of class rma.mv" = "rma.mv" %in% class(MA_mod))
Expand Down
90 changes: 90 additions & 0 deletions R/get_forest_plot_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#' @title Get Forest Plot Data from a Metafor Model
#'
#' @param model A metafor model object of class `rma.mv` or `rma.uni`
#' @return A tibble containing the data required to plot a forest plot
#' @export
#' @family Forest Plot Functions
#' @examples
#' get_forest_plot_data(model)
#' @import broom.mixed
#' @import dplyr
#' @import forcats
#' @import stringr
get_forest_plot_data <- function(model){
model %>%
broom.mixed::tidy(conf.int = TRUE, include_studies = TRUE) %>%
dplyr::mutate(
point_shape =
ifelse(stringr::str_detect(term, "overall"),
"diamond",
"circle"),
term =
forcats::fct_reorder(term,
estimate) %>%
forcats::fct_reorder(.,
point_shape,
.desc = TRUE),
parameter_type = case_when(str_detect(term, "overall") ~ "mean",
TRUE ~ "study"),
meta_analytic_mean = pull(., estimate, type) %>%
keep_at(at = "summary")) %>%
select(-type, Parameter = term, everything())
}

#' @title Plot a Forest Plot
#' @description Plot a forest plot using the data from `get_forest_plot_data`
#' @param data A tibble containing the data required to plot a forest plot
#' @param intercept Logical. Should a horizontal line be added at 0?
#' @param MA_mean Logical. Should a dashed line be added at the meta-analytic mean?
#' @return A ggplot object
#' @export
#' @import ggplot2
#' @import ggforestplot
#' @import NatParksPalettes
#' @family Forest Plot Functions
#' @examples
#' model <- ManyEcoEvo_results %>% pluck("MA_mod", 1)
#' plot_data <- get_forest_plot_data(model)
#' plot_forest(plot_data)
#' plot_forest(plot_data, intercept = FALSE)
#' plot_forest(plot_data, MA_mean = FALSE)
#' plot_forest(plot_data, intercept = FALSE, MA_mean = FALSE)
plot_forest <- function(data, intercept = TRUE, MA_mean = TRUE){
if (MA_mean == FALSE) {
data <- filter(data, Parameter != "overall")
}

p <- ggplot(data, aes(y = estimate,
x = Parameter,
ymin = conf.low,
ymax = conf.high,
shape = point_shape,
colour = parameter_type)) +
geom_pointrange(fatten = 2) +
ggforestplot::theme_forest() +
theme(axis.line = element_line(linewidth = 0.10, colour = "black"),
axis.line.y = element_blank(),
text = element_text(family = "Helvetica")#,
# axis.text.y = element_blank()
) +
guides(shape = guide_legend(title = NULL),
colour = guide_legend(title = NULL)) +
coord_flip() +
ylab(bquote(Standardised~Effect~Size~Z[r])) +
xlab(element_blank()) +
# scale_y_continuous(breaks = c(-4,-3,-2,-1,0,1),
# minor_breaks = seq(from = -4.5, to = 1.5, by = 0.5)) +
NatParksPalettes::scale_color_natparks_d("Glacier")

if (intercept == TRUE) {
p <- p + geom_hline(yintercept = 0)
}
if (MA_mean == TRUE) {
p <- p + geom_hline(aes(yintercept = meta_analytic_mean),
data = data,
colour = "#01353D",
linetype = "dashed")
}

return(p)
}
108 changes: 91 additions & 17 deletions R/prepare_response_variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,105 @@
#' @param ManyEcoEvo Complete ManyEcoEvo dataset containing nested datasets for each different analysis and exclusion set dataset
#' @param estimate_type A character string of length 1, equal to either "Zr", "yi", "y25", "y50", "y75", indicating what type of estimates are being prepared.
#' @param param_table A table of parameters \(mean, sd\) for *most* response variables used by analysts. This tibble is pulled from the named object exported by `ManyEcoEvo::`. but can be overwritten with the users's own `param_table` dataset.
#'
#' @param dataset_standardise A character string of length 1, equal to the name of the dataset to standardise the response variables to. If `NULL`, all datasets are standardised.
#' @return A tibble of nested list-columns
#' @details Operates on nested list-columns of dataframes, where each dataframe contains the response variable data for a single analysis. The function standardises the response variable data for each analysis, and returns the modified dataset to the `data` list-column.
#' @family targets-pipeline functions.
#' @family Multi-dataset Wrapper Functions
#' @export
#' @import cli
#' @import dplyr
#' @import purrr
#' @import rlang
prepare_response_variables <- function(ManyEcoEvo,
estimate_type = character(1L),
param_table = NULL) {
stopifnot(is.data.frame(ManyEcoEvo))
# TODO run checks on ManyEcoEvo
param_table = NULL,
dataset_standardise = NULL) {
match.arg(estimate_type, choices = c("Zr", "yi", "y25", "y50", "y75"), several.ok = FALSE)
out <- ManyEcoEvo %>%
ungroup() %>%
# dplyr::group_by(dataset) %>% #NOTE: mapping doesn't work properly when tibble is rowwise!
dplyr::mutate(data = purrr::map2(
.x = data, .y = dataset,
.f = ~ standardise_response(
dat = .x,
estimate_type = !!{
estimate_type
},
param_table,
dataset = .y
stopifnot(is.data.frame(ManyEcoEvo))
pointblank::expect_col_exists(object = ManyEcoEvo, columns = c(dataset, data))
if (!is.null(dataset_standardise)) {
stopifnot(is.character(dataset_standardise))
stopifnot(length(dataset_standardise) >= 1)
stopifnot(length(dataset_standardise) <= length(unique(ManyEcoEvo$dataset)))
match.arg(dataset_standardise, choices = ManyEcoEvo$dataset, several.ok = TRUE)
}

out <- ManyEcoEvo

if (estimate_type != "Zr") {
if (is.null(param_table)) {

cli::cli_abort("{.arg param_table} must be supplied for {.val {estimate_type}} data")
}

# ------ Back transform if estimate_type is yi only ------
out <- out %>%
ungroup() %>%
dplyr::mutate(
data = purrr::map2( #TODO assign to data or not?
.x = data,
.y = dataset,
.f = ~ back_transform_response_vars_yi(
dat = .x,
estimate_type = !!{
estimate_type
},
dataset = .y
)
),
diversity_data =
map2(
.x = diversity_data,
.y = data,
.f = ~ semi_join(.x, .y, by = join_by(id_col)) %>%
distinct()
)
)
))

} else {
if (!is.null(param_table)) {
cli::cli_abort("{.arg param_table} must be NULL for {.val {estimate_type}} data")
}
}

# ------ Standardise Response Variables for Meta-analysis ------

if (is.null(dataset_standardise)) {
out <- out %>%
ungroup() %>%
dplyr::mutate(data = purrr::map2(
.x = data,
.y = dataset,
.f = ~ standardise_response(
dat = .x,
estimate_type = !!{
estimate_type
},
param_table,
dataset = .y
)
))
} else {

datasets_to_standardise <- tibble(
dataset = dataset_standardise,
fns = list(standardise_response)
)

pmap_prepare_response <- function(data, estimate_type, param_table, dataset, fns, ...){
fns(data, estimate_type, param_table, dataset)
}

out <- out %>%
ungroup() %>%
left_join(datasets_to_standardise, by = "dataset") %>%
mutate(fns = coalesce(fns, list(process_response))) %>%
mutate(data = pmap(.l = .,
.f = pmap_prepare_response,
estimate_type = estimate_type,
param_table = param_table)) %>%
select(-fns) #TODO drop ci cols or not??
}
return(out)
}
Loading

0 comments on commit 42b1e38

Please sign in to comment.