Merge pull request #692 from epiforecasts/update-documentation

Issue #327 - Update package documentation
epiforecasts · Mar 8, 2024 · f3ec3ad · f3ec3ad
2 parents 8448015 + daf4d5a
commit f3ec3ad
Show file tree

Hide file tree

Showing 47 changed files with 222 additions and 200 deletions.
diff --git a/R/add_coverage.R b/R/add_coverage.R
@@ -1,9 +1,14 @@
 #' @title Get Quantile And Interval Coverage Values For Quantile-Based Forecasts
 #'
-#' @description Compute interval coverage of central prediction intervals,
-#' quantile coverage for predictive quantiles, as well as the deviation between
-#' desired and actual coverage to a data.table. Forecasts should be in a
-#' quantile format (following the input requirements of `score()`).
+#' @description For a validated forecast object in a quantile-based format
+#' (see [as_forecast()] for more information), this function computes
+#' - interval coverage of central prediction intervals
+#' - quantile coverage for predictive quantiles
+#' - the deviation between desired and actual coverage (both for interval and
+#' quantile coverage)
+#'
+#' Coverage values are computed for a specific level of grouping, as specified
+#' in the `by` argument. By default, coverage values are computed per model.
 #'
 #' **Interval coverage**
 #'
@@ -14,15 +19,6 @@
 #' central prediction interval is the interval between the 0.25 and 0.75
 #' quantiles of the predictive distribution.
 #'
-#' The function `get_coverage()` computes the coverage per central prediction
-#' interval. This means that if you set `by` to the unit of a single forecast,
-#' interval coverage will always be either `TRUE`
-#' (observed value falls within the interval) or `FALSE` (observed value falls
-#' outside the interval) and analogously for quantile coverage.
-#' Coverage values become meaningful by summarising them across different
-#' dimensions, as specified in the `by` argument (thereby returning the
-#' proportion of values covered by all prediction intervals/quantiles).
-#'
 #' **Quantile coverage**
 #'
 #' Quantile coverage for a given quantile is defined as the proportion of
@@ -38,7 +34,8 @@
 #' (can be either interval or quantile coverage) and the
 #' actual coverage. For example, if the desired coverage is 90% and the actual
 #' coverage is 80%, the coverage deviation is -0.1.
-#'
+#' @return A data.table with columns as specified in `by` and additional
+#' columns for the coverage values described above
 #' @inheritParams score
 #' @param by character vector that denotes the level of grouping for which the
 #' coverage values should be computed. By default (`"model"`), one coverage

diff --git a/R/available_forecasts.R b/R/available_forecasts.R
@@ -1,10 +1,10 @@
 #' @title Count Number of Available Forecasts
 #'
 #' @description
-#'
-#' Given a data set with forecasts, count the number of available forecasts
-#' for arbitrary grouping (e.g. the number of forecasts per model, or the
-#' number of forecasts per model and location).
+#' Given a data set with forecasts, this function counts the number of available forecasts.
+#' The level of grouping can be specified using the `by` argument (e.g. to
+#' count the number of forecasts per model, or the number of forecasts per
+#' model and location).
 #' This is useful to determine whether there are any missing forecasts.
 #'
 #' @param by character vector or `NULL` (the default) that denotes the

diff --git a/R/convenience-functions.R b/R/convenience-functions.R
@@ -44,11 +44,11 @@
 #' @param ... Additional parameters to pass to the function you supplied. For
 #' the default option of [log_shift()] this could be the `offset` argument.
 #'
-#' @return A `data.table` with either a transformed version of the data, or one
-#' with both the untransformed and the transformed data. includes the original
-#' data as well as a transformation of the original data. There will be one
-#' additional column, `scale', present which will be set to "natural" for the
-#' untransformed forecasts.
+#' @return A forecast object with either a transformed version of the data, or
+#' one with both the untransformed and the transformed data. includes the
+#' original data as well as a transformation of the original data. There will
+#' be one additional column, `scale', present which will be set to "natural"
+#' for the untransformed forecasts.
 #'
 #' @importFrom data.table ':=' is.data.table copy
 #' @importFrom cli cli_abort cli_warn

diff --git a/R/get_-functions.R b/R/get_-functions.R
@@ -190,7 +190,7 @@ get_score_names <- function(scores, error = FALSE) {
 }
 
 
-#' @title Get unit of a single forecast
+#' @title Get Unit Of A Single Forecast
 #' @description Helper function to get the unit of a single forecast, i.e.
 #' the column names that define where a single forecast was made for.
 #' This just takes all columns that are available in the data and subtracts
@@ -210,7 +210,7 @@ get_forecast_unit <- function(data) {
 }
 
 
-#' @title Get protected columns from a data frame
+#' @title Get Protected Columns From Data
 #'
 #' @description Helper function to get the names of all columns in a data frame
 #' that are protected columns.
@@ -249,7 +249,7 @@ get_protected_columns <- function(data = NULL) {
 }
 
 
-#' @title Find duplicate forecasts
+#' @title Find Duplicate Forecasts
 #'
 #' @description Helper function to identify duplicate forecasts, i.e.
 #' instances where there is more than one forecast for the same prediction
@@ -263,6 +263,7 @@ get_protected_columns <- function(data = NULL) {
 #'
 #' @return A data.frame with all rows for which a duplicate forecast was found
 #' @export
+#' @importFrom checkmate assert_data_frame assert_subset
 #' @keywords check-forecasts
 #' @examples
 #' example <- rbind(example_quantile, example_quantile[1000:1010])

diff --git a/R/metrics-sample.R b/R/metrics-sample.R
@@ -1,4 +1,4 @@
-#' @title Determines bias of forecasts
+#' @title Determine Bias Of Forecasts
 #'
 #' @description
 #' Determines bias from predictive Monte-Carlo samples. The function
@@ -76,7 +76,7 @@ bias_sample <- function(observed, predicted) {
 }
 
 
-#' @title Absolute Error of the Median (Sample-based Version)
+#' @title Absolute Error Of The Median (Sample-Based Version)
 #'
 #' @description
 #' Absolute error of the median calculated as
@@ -111,7 +111,7 @@ ae_median_sample <- function(observed, predicted) {
 }
 
 
-#' @title Squared Error of the Mean (Sample-based Version)
+#' @title Squared Error Of The Mean (Sample-Based Version)
 #'
 #' @description
 #' Squared error of the mean calculated as
@@ -143,7 +143,7 @@ se_mean_sample <- function(observed, predicted) {
 }
 
 
-#' @title Logarithmic score
+#' @title Logarithmic Score
 #'
 #' @description
 #' Wrapper around the [`logs_sample()`][scoringRules::scores_sample_univ]
@@ -243,7 +243,7 @@ crps_sample <- function(observed, predicted, ...) {
 }
 
 
-#' @title Determine dispersion of a probabilistic forecast
+#' @title Determine Dispersion Of A Probabilistic Forecast
 #' @details
 #' Sharpness is the ability of the model to generate predictions within a
 #' narrow range and dispersion is the lack thereof.

diff --git a/R/pairwise-comparisons.R b/R/pairwise-comparisons.R
@@ -26,22 +26,25 @@
 #' `permutationTest` from the `surveillance` package by Michael Höhle,
 #' Andrea Riebler and Michaela Paul.
 #'
-#' @param scores A data.table of scores as produced by [score()].
-#' @param metric A character vector of length one with the metric to do the
-#' comparison on.
-#' @param by character vector with names of columns present in the input
-#' data.frame. `by` determines how pairwise comparisons will be computed.
-#' You will get a relative skill score for every grouping level determined in
-#' `by`. If, for example, `by = c("model", "location")`. Then you will get a
+#' @param by character vector with column names that define the grouping level
+#' for the pairwise comparisons. By default (`model`), there will be one
+#' relative skill score per model. If, for example,
+#' `by = c("model", "location")`. Then you will get a
 #' separate relative skill score for every model in every location. Internally,
-#' the data.frame will be split according `by` (but removing "model" before
-#' splitting) and the pairwise comparisons will be computed separately for the
-#' split data.frames.
-#' @param baseline character vector of length one that denotes the baseline
-#' model against which to compare other models.
+#' the data.table with scores will be split according `by` (removing "model"
+#' before splitting) and the pairwise comparisons will be computed separately
+#' for the split data.tables.
+#' @param metric A string with the name of the metric for which
+#' a relative skill shall be computed. By default this is either "crps",
+#' "wis" or "brier_score" if any of these are available.
+#' @param baseline A string with the name of a model. If a baseline is
+#' given, then a scaled relative skill with respect to the baseline will be
+#' returned. By default (`NULL`), relative skill will not be scaled with
+#' respect to a baseline model.
 #' @param ... additional arguments for the comparison between two models. See
 #' [compare_two_models()] for more information.
-#' @return A ggplot2 object with a coloured table of summarised scores
+#' @inheritParams summarise_scores
+#' @return A data.table with pairwise comparisons
 #' @importFrom data.table as.data.table data.table setnames copy
 #' @importFrom stats sd rbinom wilcox.test p.adjust
 #' @importFrom utils combn
@@ -209,7 +212,7 @@ pairwise_comparison <- function(
 #' subgroup is managed from [pairwise_comparison_one_group()]. In order to
 #' actually do the comparison between two models over a subset of common
 #' forecasts it calls [compare_two_models()].
-#' @inheritParams pairwise_comparison
+#' @inherit pairwise_comparison params return
 #' @importFrom cli cli_abort
 #' @keywords internal
 
@@ -357,6 +360,8 @@ pairwise_comparison_one_group <- function(scores,
 #' determine p-values.
 #' @param n_permutations numeric, the number of permutations for a
 #' permutation test. Default is 999.
+#' @return A list with mean score ratios and p-values for the comparison
+#' between two models
 #' @importFrom cli cli_abort
 #' @author Johannes Bracher, \email{johannes.bracher@@kit.edu}
 #' @author Nikos Bosse \email{nikosbosse@@gmail.com}

diff --git a/R/plot.R b/R/plot.R
@@ -14,7 +14,7 @@
 #' @param metrics A character vector with the metrics to show. If set to
 #' `NULL` (default), all metrics present in `scores` will be shown.
 #'
-#' @return A ggplot2 object with a coloured table of summarised scores
+#' @return A ggplot object with a coloured table of summarised scores
 #' @inheritParams pairwise_comparison
 #' @importFrom ggplot2 ggplot aes element_blank element_text labs coord_cartesian coord_flip
 #' @importFrom data.table setDT melt
@@ -140,11 +140,12 @@ plot_score_table <- function(scores,
 #' @param relative_contributions show relative contributions instead of absolute
 #' contributions. Default is FALSE and this functionality is not available yet.
 #' @param flip boolean (default is `FALSE`), whether or not to flip the axes.
-#' @return A ggplot2 object showing a contributions from the three components of
+#' @return A ggplot object showing a contributions from the three components of
 #' the weighted interval score
 #' @importFrom ggplot2 ggplot aes geom_linerange facet_wrap labs
 #' scale_fill_discrete
 #' theme theme_light unit guides guide_legend .data
+#' @return A ggplot object with a visualisation of the WIS decomposition
 #' @export
 #' @examples
 #' library(ggplot2)
@@ -226,7 +227,7 @@ plot_wis <- function(scores,
 #' could be something like "horizon", or "location"
 #' @param metric the metric that determines the value and colour shown in the
 #' tiles of the heatmap
-#' @return A ggplot2 object showing a heatmap of the desired metric
+#' @return A ggplot object showing a heatmap of the desired metric
 #' @importFrom data.table setDT `:=`
 #' @importFrom ggplot2 ggplot  aes geom_tile geom_text .data
 #' scale_fill_gradient2 labs element_text coord_cartesian
@@ -410,6 +411,8 @@ plot_quantile_coverage <- function(coverage,
 #' @importFrom stats reorder
 #' @importFrom ggplot2 labs coord_cartesian facet_wrap facet_grid theme
 #' element_text element_blank
+#' @return A ggplot object with a heatmap of mean score ratios from pairwise
+#' comparisons
 #' @export
 #' @examples
 #' library(ggplot2)
@@ -541,7 +544,7 @@ plot_pairwise_comparison <- function(comparison_result,
 #' @importFrom stats as.formula
 #' @importFrom ggplot2 geom_col
 #' @importFrom stats density
-#' @return vector with the scoring values
+#' @return A ggplot object with a histogram of PIT values
 #' @examples
 #' \dontshow{
 #'   data.table::setDTthreads(2) # restricts number of cores used on CRAN
@@ -662,7 +665,7 @@ plot_pit <- function(pit,
 #' are shown on the x-axis.
 #' @param show_counts logical (default is `TRUE`) that indicates whether
 #' or not to show the actual count numbers on the plot
-#' @return ggplot object with a plot of interval coverage
+#' @return A ggplot object with a plot of forecast counts
 #' @importFrom ggplot2 ggplot scale_colour_manual scale_fill_manual
 #' geom_tile scale_fill_gradient .data
 #' @importFrom data.table dcast .I .N
@@ -731,12 +734,13 @@ plot_forecast_counts <- function(forecast_counts,
 #'
 #' @param correlations A data.table of correlations between scores as produced
 #' by [correlation()].
-#' @return A ggplot2 object showing a coloured matrix of correlations
+#' @return A ggplot object showing a coloured matrix of correlations
 #' between metrics
 #' @importFrom ggplot2 ggplot geom_tile geom_text aes scale_fill_gradient2
 #' element_text labs coord_cartesian theme element_blank
 #' @importFrom data.table setDT melt
 #' @export
+#' @return A ggplot object with a visualisation of correlations between metrics
 #' @examples
 #' scores <- score(as_forecast(example_quantile))
 #' correlations <- correlation(

diff --git a/R/score.R b/R/score.R
@@ -22,7 +22,9 @@
 #' @param ... additional arguments
 #' @return An object of class `scores`. This object is a data.table with
 #' unsummarised scores (one score per forecast) and has an additional attribute
-#' `score_names` with the names of the metrics used for scoring.
+#' `score_names` with the names of the metrics used for scoring. See
+#' [summarise_scores()]) for information on how to summarise
+#' scores.
 #' @importFrom data.table ':=' as.data.table
 #' @importFrom stats na.omit
 #' @examples
@@ -232,6 +234,7 @@ apply_rules <- function(data, metrics, ...) {
 #' @param score_names A character vector with the names of the scores
 #' (i.e. the names of the scoring rules used for scoring)
 #' @keywords internal
+#' @return An object of class `scores`
 #' @examples
 #' \dontrun{
 #' df <- data.frame(
@@ -251,8 +254,7 @@ new_scores <- function(scores, score_names) {
 #' Create An Object Of Class `scores` From Data
 #' @description This convenience function wraps [new_scores()] and validates
 #' the `scores` object.
-#' @inheritParams new_scores
-#' @returns Returns an object of class 1scores`
+#' @inherit new_scores params return
 #' @importFrom checkmate assert_data_frame
 #' @keywords internal
 as_scores <- function(scores, score_names) {

diff --git a/R/summarise_scores.R b/R/summarise_scores.R
@@ -10,21 +10,15 @@
 #' do, you also have to manually update the attribute by calling
 #' `attr(scores, "score_names") <- new_names`.
 #'
-#' @inheritParams pairwise_comparison
-#' @inheritParams score
+#' @param scores An object of class `scores` (a data.table with
+#' scores and an additional attribute `score_names` as produced by [score()])
 #' @param by character vector with column names to summarise scores by. Default
 #' is `model`, meaning that there will be one score per model in the output.
-#' The *unit of a single forecast* is determined by the columns present in the
-#' input data that do not correspond to a metric produced by [score()], which
-#' indicate indicate a grouping of forecasts (for example there may be one
-#' forecast per day, location and model). Adding additional, unrelated, columns
-#' may alter results in an unpredictable way.
-#' @param across character vector with column names from the vector of variables
-#' that define the *unit of a single forecast* (see above) to summarise scores
+#' @param across character vector with column names to summarise scores
 #' across (meaning that the specified columns will be dropped). This is an
 #' alternative to specifying `by` directly. If `across` is set, `by` will be
 #' ignored. If `across` is `NULL` (default), then `by` will be used.
-#' @param fun a function used for summarising scores. Default is `mean`.
+#' @param fun a function used for summarising scores. Default is [mean()].
 #' @param ... additional parameters that can be passed to the summary function
 #' provided to `fun`. For more information see the documentation of the
 #' respective function.
@@ -115,24 +109,11 @@ summarize_scores <- summarise_scores
 #' @title Add pairwise comparisons
 #' @description Adds a columns with relative skills computed by running
 #' pairwise comparisons on the scores.
-#'
-#' a column called
-#' 'model' must be present in the input data. For more information on
+#' For more information on
 #' the computation of relative skill, see [pairwise_comparison()].
 #' Relative skill will be calculated for the aggregation level specified in
 #' `by`.
-#' WRITE MORE INFO HERE.
-#'
-#'
-#' @param scores MORE INFO HERE.
-#' @param by character vector with column names to summarise scores by. Default
-#' is "model", meaning that there will be one relative skill score per model.
-#' @param metric character with the name of the metric for which
-#' a relative skill shall be computed.
-#' @param baseline character string with the name of a model. If a baseline is
-#' given, then a scaled relative skill with respect to the baseline will be
-#' returned. By default (`NULL`), relative skill will not be scaled with
-#' respect to a baseline model.
+#' @inheritParams pairwise_comparison
 #' @export
 #' @keywords keyword scoring
 add_pairwise_comparison <- function(

diff --git a/R/utils.R b/R/utils.R
@@ -72,7 +72,7 @@ run_safely <- function(..., fun) {
 #' If the object is not a data table, it is converted to one. If the object
 #' is a data table, a copy of the object is returned.
 #' @param data An object to ensure is a data table
-#' @return A data table
+#' @return A data.table/a copy of an exising data.table
 #' @keywords internal
 #' @importFrom data.table copy is.data.table as.data.table
 ensure_data.table <- function(data) {
@@ -92,8 +92,8 @@ ensure_data.table <- function(data) {
 #' @param x An object of class 'forecast_*' object as produced by
 #' `as_forecast()`
 #' @param ... additional arguments for [print()]
+#' @return returns x invisibly
 #' @importFrom cli cli_inform cli_warn col_blue cli_text
-#' @return NULL
 #' @export
 #' @keywords check-forecasts
 #' @examples