diff --git a/NAMESPACE b/NAMESPACE
index cdc4ad221..cf19c6db3 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -48,6 +48,10 @@ export(logs_binary)
 export(logs_sample)
 export(mad_sample)
 export(merge_pred_and_obs)
+export(metrics_binary)
+export(metrics_point)
+export(metrics_quantile)
+export(metrics_sample)
 export(new_forecast)
 export(overprediction)
 export(pairwise_comparison)
@@ -64,15 +68,11 @@ export(plot_score_table)
 export(plot_wis)
 export(quantile_score)
 export(quantile_to_interval)
-export(rules_binary)
-export(rules_point)
-export(rules_quantile)
-export(rules_sample)
 export(run_safely)
 export(sample_to_quantile)
 export(score)
 export(se_mean_sample)
-export(select_rules)
+export(select_metrics)
 export(set_forecast_unit)
 export(summarise_scores)
 export(summarize_scores)
diff --git a/NEWS.md b/NEWS.md
index fa56b1010..4f1e33cf7 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -8,21 +8,21 @@ The update introduces breaking changes. If you want to keep using the older vers
 - In `score()`, required columns "true_value" and "prediction" were renamed and replaced by required columns "observed" and "predicted" and "model" (which now is a required column). Scoring functions now also use the function arguments "observed" and "predicted" everywhere consistently. 
 - The overall scoring workflow was updated. Most functions now operate on forecast objects, which can be created using the function `as_forecast()`. This function replaces the previous `check_forecast()` function and validates the inputs. `as_forecast()` also allows users to rename required columns and specify the forecast unit in a single step, taking over the functionality of `set_forecast_unit()` in most cases. `score()` is now a generic function that dispatches the correct method based on the forecast type. Forecast types currently supported are "binary", "point", "sample" and "quantile" with corresponding classes "forecast_binary", "forecast_point", "forecast_sample" and "forecast_quantile".
 - `set_forecast_unit()` now errors if any of the values in `forecast_unit` are not columns of the data. 
-- Scoring rules (functions used for scoring) received a consistent interface and input checks:
-  - Scoring rules for binary forecasts:
+- All scoring functions exported by the package received a consistent interface and input checks:
+  - Metrics and scoring rules for binary forecasts:
     - `observed`: factor with exactly 2 levels
     - `predicted`: numeric, vector with probabilities
-  - Scoring rules for point forecasts:
+  - Metrics and scoring rules for point forecasts:
     - `observed`: numeric vector
     - `predicted`: numeric vector
-  - Scoring rules for sample-based forecasts:
+  - Metrics and scoring rules for sample-based forecasts:
     - `observed`: numeric, either a scalar or a vector
     - `predicted`: numeric, a vector (if `observed` is a scalar) or a matrix (if `observed` is a vector)
-  - Scoring rules for quantile-based forecasts:
+  - Metrics and scoring rules for quantile-based forecasts:
     - `observed`: numeric, either a scalar or a vector
     - `predicted`: numeric, a vector (if `observed` is a scalar) or a matrix (if `observed` is a vector)
     - `quantile_level`: numeric, a vector with quantile-levels. Can alternatively be a matrix of the same shape as `predicted`.
-- Users can now supply their own scoring rules to `score()` as a list of functions. Default scoring rules can be accessed using the functions `rules_point()`, `rules_sample()`, `rules_quantile()` and `rules_binary()`, which return a named list of scoring rules suitable for the respective forecast type. Column names of scores in the output of `score()` correspond to the names of the scoring rules (i.e. the names of the functions in the list of scoring rules). 
+- Users can now supply their own metrics and scoring rules to `score()` as a list of functions. Default scoring rules can be accessed using the functions `metrics_point()`, `metrics_sample()`, `metrics_quantile()` and `metrics_binary()`, which return a named list of scoring rules suitable for the respective forecast type. Column names of scores in the output of `score()` correspond to the names of the scoring rules (i.e. the names of the functions in the list of metrics). 
 - `score()` now returns objects of class `scores` with a stored attribute `metrics` that holds the names of the scoring rules that were used. Users can call `get_metrics()` to access the names of those scoring rules. 
 - `check_forecasts()` was replaced by a different workflow. There now is a function, `as_forecast()`, that determines forecast type of the data, constructs a forecasting object and validates it using the function `validate_forecast()` (a generic that dispatches the correct method based on the forecast type). Objects of class `forecast_binary`, `forecast_point`, `forecast_sample` and `forecast_quantile` have print methods that fulfill the functionality of `check_forecasts()`.
 - Users can test whether an object is of class `forecast_*()` using the function `is_forecast()`. Users can also test for a specific `forecast_*` class using the appropriate `is_forecast.forecast_*` method. For example, to check whether an object is of class `forecast_quantile`, you would use you would use `scoringutils:::is_forecast.forecast_quantile()`.
diff --git a/R/default-scoring-rules.R b/R/default-scoring-rules.R
index 97b7262f1..66f235766 100644
--- a/R/default-scoring-rules.R
+++ b/R/default-scoring-rules.R
@@ -1,7 +1,7 @@
-#' @title Select Scoring Rules From A List of Possible Scoring Rules
+#' @title Select Metrics From A List of Functions
 #' @description Helper function to return only the scoring rules selected by
-#' the user from a list of possible scoring rules.
-#' @param rules A list of scoring rules.
+#' the user from a list of possible functions.
+#' @param metrics A list of scoring functions.
 #' @param select A character vector of scoring rules to select from the list.
 #' If `select` is `NULL` (the default), all possible scoring rules are returned.
 #' @param exclude A character vector of scoring rules to exclude from the list.
@@ -11,57 +11,57 @@
 #' @importFrom checkmate assert_subset assert_list
 #' @export
 #' @examples
-#' select_rules(
-#'   rules = rules_binary(),
+#' select_metrics(
+#'   metrics = metrics_binary(),
 #'   select = "brier_score"
 #' )
-#' select_rules(
-#'   rules = rules_binary(),
+#' select_metrics(
+#'   metrics = metrics_binary(),
 #'   exclude = "log_score"
 #' )
-select_rules <- function(rules, select = NULL, exclude = NULL) {
+select_metrics <- function(metrics, select = NULL, exclude = NULL) {
   assert_character(x = c(select, exclude), null.ok = TRUE)
-  assert_list(rules, names = "named")
-  allowed <- names(rules)
+  assert_list(metrics, names = "named")
+  allowed <- names(metrics)
 
   if (is.null(select) && is.null(exclude)) {
-    return(rules)
+    return(metrics)
   } else if (is.null(select)) {
     assert_subset(exclude, allowed)
     select <- allowed[!allowed %in% exclude]
-    return(rules[select])
+    return(metrics[select])
   } else {
     assert_subset(select, allowed)
-    return(rules[select])
+    return(metrics[select])
   }
 }
 
 
-#' @title Scoring Rules for Binary Forecasts
+#' @title Default Metrics And Scoring Rules for Binary Forecasts
 #' @description Helper function that returns a named list of default
 #' scoring rules suitable for binary forecasts.
 #'
 #' The default scoring rules are:
 #' - "brier_score" = [brier_score()]
 #' - "log_score" = [logs_binary()]
-#' @inherit select_rules params return
+#' @inherit select_metrics params return
 #' @export
 #' @keywords metric
 #' @examples
-#' rules_binary()
-#' rules_binary(select = "brier_score")
-#' rules_binary(exclude = "log_score")
-rules_binary <- function(select = NULL, exclude = NULL) {
+#' metrics_binary()
+#' metrics_binary(select = "brier_score")
+#' metrics_binary(exclude = "log_score")
+metrics_binary <- function(select = NULL, exclude = NULL) {
   all <- list(
     brier_score = brier_score,
     log_score = logs_binary
   )
-  selected <- select_rules(all, select, exclude)
+  selected <- select_metrics(all, select, exclude)
   return(selected)
 }
 
 
-#' @title Scoring Rules for Point Forecasts
+#' @title Default Metrics And Scoring Rules for Point Forecasts
 #' @description Helper function that returns a named list of default
 #' scoring rules suitable for point forecasts.
 #'
@@ -69,26 +69,26 @@ rules_binary <- function(select = NULL, exclude = NULL) {
 #' - "ae_point" = [ae()][Metrics::ae()]
 #' - "se_point" = [se()][Metrics::se()]
 #' - "ape" = [ape()][Metrics::ape()]
-#' @inherit select_rules params return
+#' @inherit select_metrics params return
 #' @export
 #' @keywords metric
 #' @examples
-#' rules_point()
-#' rules_point(select = "ape")
-rules_point <- function(select = NULL, exclude = NULL) {
+#' metrics_point()
+#' metrics_point(select = "ape")
+metrics_point <- function(select = NULL, exclude = NULL) {
   all <- list(
     ae_point = Metrics::ae,
     se_point = Metrics::se,
     ape = Metrics::ape
   )
-  selected <- select_rules(all, select, exclude)
+  selected <- select_metrics(all, select, exclude)
   return(selected)
 }
 
 
-#' @title Scoring Rules for Sample-Based Forecasts
+#' @title Default Metrics And Scoring Rules for Sample-Based Forecasts
 #' @description Helper function that returns a named list of default
-#' scoring rules suitable for forecasts in a sample-based format
+#' scoring rules suitable for forecasts in a sample-based format.
 #'
 #' The default scoring rules are:
 #' - "mad" = [mad_sample()]
@@ -99,13 +99,13 @@ rules_point <- function(select = NULL, exclude = NULL) {
 #' - "mad" = [mad_sample()]
 #' - "ae_median" = [ae_median_sample()]
 #' - "se_mean" = [se_mean_sample()]
-#' @inherit select_rules params return
+#' @inherit select_metrics params return
 #' @export
 #' @keywords metric
 #' @examples
-#' rules_sample()
-#' rules_sample(select = "mad")
-rules_sample <- function(select = NULL, exclude = NULL) {
+#' metrics_sample()
+#' metrics_sample(select = "mad")
+metrics_sample <- function(select = NULL, exclude = NULL) {
   all <- list(
     bias = bias_sample,
     dss = dss_sample,
@@ -115,14 +115,14 @@ rules_sample <- function(select = NULL, exclude = NULL) {
     ae_median = ae_median_sample,
     se_mean = se_mean_sample
   )
-  selected <- select_rules(all, select, exclude)
+  selected <- select_metrics(all, select, exclude)
   return(selected)
 }
 
 
-#' @title Scoring Rules for Quantile-Based Forecasts
+#' @title Default Metrics And Scoring Rules for Quantile-Based Forecasts
 #' @description Helper function that returns a named list of default
-#' scoring rules suitable for forecasts in a quantile-based format
+#' scoring rules suitable for forecasts in a quantile-based format.
 #'
 #' The default scoring rules are:
 #' - "wis" = [wis]
@@ -144,13 +144,13 @@ rules_sample <- function(select = NULL, exclude = NULL) {
 #' accept get passed on to it. `interval_range = 90` is set in the function definition,
 #' as passing an argument `interval_range = 90` to [score()] would mean it would also
 #' get passed to `interval_coverage_50`.
-#' @inherit select_rules params return
+#' @inherit select_metrics params return
 #' @export
 #' @keywords metric
 #' @examples
-#' rules_quantile()
-#' rules_quantile(select = "wis")
-rules_quantile <- function(select = NULL, exclude = NULL) {
+#' metrics_quantile()
+#' metrics_quantile(select = "wis")
+metrics_quantile <- function(select = NULL, exclude = NULL) {
   all <- list(
     wis = wis,
     overprediction = overprediction,
@@ -164,6 +164,6 @@ rules_quantile <- function(select = NULL, exclude = NULL) {
     interval_coverage_deviation = interval_coverage_deviation,
     ae_median = ae_median_quantile
   )
-  selected <- select_rules(all, select, exclude)
+  selected <- select_metrics(all, select, exclude)
   return(selected)
 }
diff --git a/R/get_-functions.R b/R/get_-functions.R
index 4da769e73..2a9396924 100644
--- a/R/get_-functions.R
+++ b/R/get_-functions.R
@@ -130,7 +130,7 @@ get_type <- function(x) {
 }
 
 
-#' @title Get Names Of The Scoring Rules That Were Used For Scoring
+#' @title Get Names Of The Metrics That Were Used For Scoring
 #' @description
 #' When applying a scoring rule via [score()], the names of the scoring rules
 #' become column names of the
diff --git a/R/score.R b/R/score.R
index d7bd36f0d..f5efd4e2b 100644
--- a/R/score.R
+++ b/R/score.R
@@ -16,8 +16,8 @@
 #' @param data A forecast object (a validated data.table with predicted and
 #' observed values, see [as_forecast()])
 #' @param metrics A named list of scoring functions. Names will be used as
-#' column names in the output. See [rules_point()], [rules_binary()],
-#' [rules_quantile()], and [rules_sample()] for more information on the
+#' column names in the output. See [metrics_point()], [metrics_binary()],
+#' [metrics_quantile()], and [metrics_sample()] for more information on the
 #' default metrics used.
 #' @param ... additional arguments
 #' @return An object of class `scores`. This object is a data.table with
@@ -81,13 +81,13 @@ score.default <- function(data, metrics, ...) {
 #' @importFrom data.table setattr copy
 #' @rdname score
 #' @export
-score.forecast_binary <- function(data, metrics = rules_binary(), ...) {
+score.forecast_binary <- function(data, metrics = metrics_binary(), ...) {
   data <- copy(data)
   suppressWarnings(suppressMessages(validate_forecast(data)))
   data <- na.omit(data)
   metrics <- validate_metrics(metrics)
 
-  scores <- apply_rules(
+  scores <- apply_metrics(
     data, metrics,
     data$observed, data$predicted, ...
   )
@@ -102,13 +102,13 @@ score.forecast_binary <- function(data, metrics = rules_binary(), ...) {
 #' @importFrom data.table setattr copy
 #' @rdname score
 #' @export
-score.forecast_point <- function(data, metrics = rules_point(), ...) {
+score.forecast_point <- function(data, metrics = metrics_point(), ...) {
   data <- copy(data)
   suppressWarnings(suppressMessages(validate_forecast(data)))
   data <- na.omit(data)
   metrics <- validate_metrics(metrics)
 
-  scores <- apply_rules(
+  scores <- apply_metrics(
     data, metrics,
     data$observed, data$predicted, ...
   )
@@ -121,7 +121,7 @@ score.forecast_point <- function(data, metrics = rules_point(), ...) {
 #' @importFrom data.table setattr copy
 #' @rdname score
 #' @export
-score.forecast_sample <- function(data, metrics = rules_sample(), ...) {
+score.forecast_sample <- function(data, metrics = metrics_sample(), ...) {
   data <- copy(data)
   suppressWarnings(suppressMessages(validate_forecast(data)))
   data <- na.omit(data)
@@ -144,7 +144,7 @@ score.forecast_sample <- function(data, metrics = rules_sample(), ...) {
     predicted <- do.call(rbind, data$predicted)
     data[, c("observed", "predicted", "scoringutils_N") := NULL]
 
-    data <- apply_rules(
+    data <- apply_metrics(
       data, metrics,
       observed, predicted, ...
     )
@@ -160,7 +160,7 @@ score.forecast_sample <- function(data, metrics = rules_sample(), ...) {
 #' @importFrom data.table `:=` as.data.table rbindlist %like% setattr copy
 #' @rdname score
 #' @export
-score.forecast_quantile <- function(data, metrics = rules_quantile(), ...) {
+score.forecast_quantile <- function(data, metrics = metrics_quantile(), ...) {
   data <- copy(data)
   suppressWarnings(suppressMessages(validate_forecast(data)))
   data <- na.omit(data)
@@ -190,7 +190,7 @@ score.forecast_quantile <- function(data, metrics = rules_quantile(), ...) {
       "observed", "predicted", "quantile_level", "scoringutils_quantile_level"
     ) := NULL]
 
-    data <- apply_rules(
+    data <- apply_metrics(
       data, metrics,
       observed, predicted, quantile_level, ...
     )
@@ -206,7 +206,7 @@ score.forecast_quantile <- function(data, metrics = rules_quantile(), ...) {
 
 #' @title Apply A List Of Functions To A Data Table Of Forecasts
 #' @description This helper function applies scoring rules (stored as a list of
-#' functions) to a data table of forecasts. `apply_rules` is used within
+#' functions) to a data table of forecasts. `apply_metrics` is used within
 #' `score()` to apply all scoring rules to the data.
 #' Scoring rules are wrapped in [run_safely()] to catch errors and to make
 #' sure that only arguments are passed to the scoring rule that are actually
@@ -214,7 +214,7 @@ score.forecast_quantile <- function(data, metrics = rules_quantile(), ...) {
 #' @inheritParams score
 #' @return A data table with the forecasts and the calculated metrics
 #' @keywords internal
-apply_rules <- function(data, metrics, ...) {
+apply_metrics <- function(data, metrics, ...) {
   expr <- expression(
     data[, (metric_name) := do.call(run_safely, list(..., fun = fun))]
   )
diff --git a/R/z_globalVariables.R b/R/z_globalVariables.R
index f4b4c6ff3..d2037617d 100644
--- a/R/z_globalVariables.R
+++ b/R/z_globalVariables.R
@@ -44,10 +44,10 @@ globalVariables(c(
   "metric",
   "metrics_select",
   "metrics",
-  "rules_binary",
-  "rules_point",
-  "rules_quantile",
-  "rules_sample",
+  "metrics_binary",
+  "metrics_point",
+  "metrics_quantile",
+  "metrics_sample",
   "model",
   "n_obs",
   "n_obs wis_component_name",
diff --git a/man/apply_rules.Rd b/man/apply_metrics.Rd
similarity index 67%
rename from man/apply_rules.Rd
rename to man/apply_metrics.Rd
index 350dcb2f4..ff876a71d 100644
--- a/man/apply_rules.Rd
+++ b/man/apply_metrics.Rd
@@ -1,18 +1,18 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/score.R
-\name{apply_rules}
-\alias{apply_rules}
+\name{apply_metrics}
+\alias{apply_metrics}
 \title{Apply A List Of Functions To A Data Table Of Forecasts}
 \usage{
-apply_rules(data, metrics, ...)
+apply_metrics(data, metrics, ...)
 }
 \arguments{
 \item{data}{A forecast object (a validated data.table with predicted and
 observed values, see \code{\link[=as_forecast]{as_forecast()}})}
 
 \item{metrics}{A named list of scoring functions. Names will be used as
-column names in the output. See \code{\link[=rules_point]{rules_point()}}, \code{\link[=rules_binary]{rules_binary()}},
-\code{\link[=rules_quantile]{rules_quantile()}}, and \code{\link[=rules_sample]{rules_sample()}} for more information on the
+column names in the output. See \code{\link[=metrics_point]{metrics_point()}}, \code{\link[=metrics_binary]{metrics_binary()}},
+\code{\link[=metrics_quantile]{metrics_quantile()}}, and \code{\link[=metrics_sample]{metrics_sample()}} for more information on the
 default metrics used.}
 
 \item{...}{additional arguments}
@@ -22,7 +22,7 @@ A data table with the forecasts and the calculated metrics
 }
 \description{
 This helper function applies scoring rules (stored as a list of
-functions) to a data table of forecasts. \code{apply_rules} is used within
+functions) to a data table of forecasts. \code{apply_metrics} is used within
 \code{score()} to apply all scoring rules to the data.
 Scoring rules are wrapped in \code{\link[=run_safely]{run_safely()}} to catch errors and to make
 sure that only arguments are passed to the scoring rule that are actually
diff --git a/man/get_metrics.Rd b/man/get_metrics.Rd
index 9eaf14c68..5ce904a66 100644
--- a/man/get_metrics.Rd
+++ b/man/get_metrics.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/get_-functions.R
 \name{get_metrics}
 \alias{get_metrics}
-\title{Get Names Of The Scoring Rules That Were Used For Scoring}
+\title{Get Names Of The Metrics That Were Used For Scoring}
 \usage{
 get_metrics(scores, error = FALSE)
 }
diff --git a/man/rules_binary.Rd b/man/metrics_binary.Rd
similarity index 75%
rename from man/rules_binary.Rd
rename to man/metrics_binary.Rd
index f5c455e35..5203bb9e4 100644
--- a/man/rules_binary.Rd
+++ b/man/metrics_binary.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/default-scoring-rules.R
-\name{rules_binary}
-\alias{rules_binary}
-\title{Scoring Rules for Binary Forecasts}
+\name{metrics_binary}
+\alias{metrics_binary}
+\title{Default Metrics And Scoring Rules for Binary Forecasts}
 \usage{
-rules_binary(select = NULL, exclude = NULL)
+metrics_binary(select = NULL, exclude = NULL)
 }
 \arguments{
 \item{select}{A character vector of scoring rules to select from the list.
@@ -27,8 +27,8 @@ The default scoring rules are:
 }
 }
 \examples{
-rules_binary()
-rules_binary(select = "brier_score")
-rules_binary(exclude = "log_score")
+metrics_binary()
+metrics_binary(select = "brier_score")
+metrics_binary(exclude = "log_score")
 }
 \keyword{metric}
diff --git a/man/rules_point.Rd b/man/metrics_point.Rd
similarity index 79%
rename from man/rules_point.Rd
rename to man/metrics_point.Rd
index 55f562dc9..afcd30e9a 100644
--- a/man/rules_point.Rd
+++ b/man/metrics_point.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/default-scoring-rules.R
-\name{rules_point}
-\alias{rules_point}
-\title{Scoring Rules for Point Forecasts}
+\name{metrics_point}
+\alias{metrics_point}
+\title{Default Metrics And Scoring Rules for Point Forecasts}
 \usage{
-rules_point(select = NULL, exclude = NULL)
+metrics_point(select = NULL, exclude = NULL)
 }
 \arguments{
 \item{select}{A character vector of scoring rules to select from the list.
@@ -28,7 +28,7 @@ The default scoring rules are:
 }
 }
 \examples{
-rules_point()
-rules_point(select = "ape")
+metrics_point()
+metrics_point(select = "ape")
 }
 \keyword{metric}
diff --git a/man/rules_quantile.Rd b/man/metrics_quantile.Rd
similarity index 87%
rename from man/rules_quantile.Rd
rename to man/metrics_quantile.Rd
index f489a65c6..2bcdd1c4e 100644
--- a/man/rules_quantile.Rd
+++ b/man/metrics_quantile.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/default-scoring-rules.R
-\name{rules_quantile}
-\alias{rules_quantile}
-\title{Scoring Rules for Quantile-Based Forecasts}
+\name{metrics_quantile}
+\alias{metrics_quantile}
+\title{Default Metrics And Scoring Rules for Quantile-Based Forecasts}
 \usage{
-rules_quantile(select = NULL, exclude = NULL)
+metrics_quantile(select = NULL, exclude = NULL)
 }
 \arguments{
 \item{select}{A character vector of scoring rules to select from the list.
@@ -18,7 +18,7 @@ A list of scoring rules.
 }
 \description{
 Helper function that returns a named list of default
-scoring rules suitable for forecasts in a quantile-based format
+scoring rules suitable for forecasts in a quantile-based format.
 
 The default scoring rules are:
 \itemize{
@@ -44,7 +44,7 @@ as passing an argument \code{interval_range = 90} to \code{\link[=score]{score()
 get passed to \code{interval_coverage_50}.
 }
 \examples{
-rules_quantile()
-rules_quantile(select = "wis")
+metrics_quantile()
+metrics_quantile(select = "wis")
 }
 \keyword{metric}
diff --git a/man/rules_sample.Rd b/man/metrics_sample.Rd
similarity index 80%
rename from man/rules_sample.Rd
rename to man/metrics_sample.Rd
index 0a6399beb..346f44686 100644
--- a/man/rules_sample.Rd
+++ b/man/metrics_sample.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/default-scoring-rules.R
-\name{rules_sample}
-\alias{rules_sample}
-\title{Scoring Rules for Sample-Based Forecasts}
+\name{metrics_sample}
+\alias{metrics_sample}
+\title{Default Metrics And Scoring Rules for Sample-Based Forecasts}
 \usage{
-rules_sample(select = NULL, exclude = NULL)
+metrics_sample(select = NULL, exclude = NULL)
 }
 \arguments{
 \item{select}{A character vector of scoring rules to select from the list.
@@ -18,7 +18,7 @@ A list of scoring rules.
 }
 \description{
 Helper function that returns a named list of default
-scoring rules suitable for forecasts in a sample-based format
+scoring rules suitable for forecasts in a sample-based format.
 
 The default scoring rules are:
 \itemize{
@@ -33,7 +33,7 @@ The default scoring rules are:
 }
 }
 \examples{
-rules_sample()
-rules_sample(select = "mad")
+metrics_sample()
+metrics_sample(select = "mad")
 }
 \keyword{metric}
diff --git a/man/score.Rd b/man/score.Rd
index 94b415644..6096199d7 100644
--- a/man/score.Rd
+++ b/man/score.Rd
@@ -10,21 +10,21 @@
 \usage{
 score(data, metrics, ...)
 
-\method{score}{forecast_binary}(data, metrics = rules_binary(), ...)
+\method{score}{forecast_binary}(data, metrics = metrics_binary(), ...)
 
-\method{score}{forecast_point}(data, metrics = rules_point(), ...)
+\method{score}{forecast_point}(data, metrics = metrics_point(), ...)
 
-\method{score}{forecast_sample}(data, metrics = rules_sample(), ...)
+\method{score}{forecast_sample}(data, metrics = metrics_sample(), ...)
 
-\method{score}{forecast_quantile}(data, metrics = rules_quantile(), ...)
+\method{score}{forecast_quantile}(data, metrics = metrics_quantile(), ...)
 }
 \arguments{
 \item{data}{A forecast object (a validated data.table with predicted and
 observed values, see \code{\link[=as_forecast]{as_forecast()}})}
 
 \item{metrics}{A named list of scoring functions. Names will be used as
-column names in the output. See \code{\link[=rules_point]{rules_point()}}, \code{\link[=rules_binary]{rules_binary()}},
-\code{\link[=rules_quantile]{rules_quantile()}}, and \code{\link[=rules_sample]{rules_sample()}} for more information on the
+column names in the output. See \code{\link[=metrics_point]{metrics_point()}}, \code{\link[=metrics_binary]{metrics_binary()}},
+\code{\link[=metrics_quantile]{metrics_quantile()}}, and \code{\link[=metrics_sample]{metrics_sample()}} for more information on the
 default metrics used.}
 
 \item{...}{additional arguments}
diff --git a/man/select_rules.Rd b/man/select_metrics.Rd
similarity index 65%
rename from man/select_rules.Rd
rename to man/select_metrics.Rd
index 605d7176b..da1ecf5f4 100644
--- a/man/select_rules.Rd
+++ b/man/select_metrics.Rd
@@ -1,13 +1,13 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/default-scoring-rules.R
-\name{select_rules}
-\alias{select_rules}
-\title{Select Scoring Rules From A List of Possible Scoring Rules}
+\name{select_metrics}
+\alias{select_metrics}
+\title{Select Metrics From A List of Functions}
 \usage{
-select_rules(rules, select = NULL, exclude = NULL)
+select_metrics(metrics, select = NULL, exclude = NULL)
 }
 \arguments{
-\item{rules}{A list of scoring rules.}
+\item{metrics}{A list of scoring functions.}
 
 \item{select}{A character vector of scoring rules to select from the list.
 If \code{select} is \code{NULL} (the default), all possible scoring rules are returned.}
@@ -20,15 +20,15 @@ A list of scoring rules.
 }
 \description{
 Helper function to return only the scoring rules selected by
-the user from a list of possible scoring rules.
+the user from a list of possible functions.
 }
 \examples{
-select_rules(
-  rules = rules_binary(),
+select_metrics(
+  metrics = metrics_binary(),
   select = "brier_score"
 )
-select_rules(
-  rules = rules_binary(),
+select_metrics(
+  metrics = metrics_binary(),
   exclude = "log_score"
 )
 }
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
index 2c5f7c91b..47131b862 100644
--- a/tests/testthat/setup.R
+++ b/tests/testthat/setup.R
@@ -4,11 +4,11 @@ library(data.table)
 suppressMessages(library(magrittr))
 data.table::setDTthreads(2) # restricts number of cores used on CRAN
 
-metrics_no_cov <- rules_quantile(
+metrics_no_cov <- metrics_quantile(
   exclude = c("interval_coverage_50", "interval_coverage_90",
               "interval_coverage_deviation")
 )
-metrics_no_cov_no_ae <- rules_quantile(
+metrics_no_cov_no_ae <- metrics_quantile(
   exclude = c("interval_coverage_50", "interval_coverage_90",
               "interval_coverage_deviation", "ae_median")
 )
diff --git a/tests/testthat/test-default-scoring-rules.R b/tests/testthat/test-default-scoring-rules.R
index a53dcbe00..0ad5cc0de 100644
--- a/tests/testthat/test-default-scoring-rules.R
+++ b/tests/testthat/test-default-scoring-rules.R
@@ -1,35 +1,35 @@
-test_that("`select_rules` works as expected", {
+test_that("`select_metrics` works as expected", {
 
   expect_equal(
-    scoringutils:::select_rules(rules_point(), select = NULL),
-    rules_point()
+    scoringutils:::select_metrics(metrics_point(), select = NULL),
+    metrics_point()
   )
 
   expect_equal(
-    scoringutils:::select_rules(rules_point(), select = NULL),
-    scoringutils:::select_rules(rules_point())
+    scoringutils:::select_metrics(metrics_point(), select = NULL),
+    scoringutils:::select_metrics(metrics_point())
   )
 
   expect_equal(
-    names(scoringutils:::select_rules(rules_point(), select = "ape")),
+    names(scoringutils:::select_metrics(metrics_point(), select = "ape")),
     "ape"
   )
 
   expect_equal(
-    length(scoringutils:::select_rules(rules_point(), select = NULL, exclude = "ape")),
-    length(rules_point()) - 1
+    length(scoringutils:::select_metrics(metrics_point(), select = NULL, exclude = "ape")),
+    length(metrics_point()) - 1
   )
 
   # if both select and exclude are specified, exclude is ignored
   expect_equal(
-    names(scoringutils:::select_rules(rules_point(), select = "ape", exclude = "ape")),
+    names(scoringutils:::select_metrics(metrics_point(), select = "ape", exclude = "ape")),
     "ape"
   )
 
   # expect error if possibilities is not a list
   expect_error(
-    scoringutils:::select_rules(rules_point, select = NULL),
-    "Assertion on 'rules' failed: Must be of type 'list', not 'closure'."
+    scoringutils:::select_metrics(metrics_point, select = NULL),
+    "Assertion on 'metrics' failed: Must be of type 'list', not 'closure'."
   )
 })
 
@@ -38,38 +38,38 @@ test_that("default rules work as expected", {
 
   expect_true(
     all(c(
-      is.list(rules_point()),
-      is.list(rules_binary()),
-      is.list(rules_quantile()),
-      is.list(rules_sample()))
+      is.list(metrics_point()),
+      is.list(metrics_binary()),
+      is.list(metrics_quantile()),
+      is.list(metrics_sample()))
     )
   )
 
   expect_equal(
-    names(rules_point(select = "ape")),
+    names(metrics_point(select = "ape")),
     "ape"
   )
 
   expect_equal(
-    length(rules_binary(select = NULL, exclude = "brier_score")),
-    length(rules_binary()) - 1
+    length(metrics_binary(select = NULL, exclude = "brier_score")),
+    length(metrics_binary()) - 1
   )
 
   # if both select and exclude are specified, exclude is ignored
   expect_equal(
-    names(scoringutils:::select_rules(rules_quantile(), select = "wis", exclude = "wis")),
+    names(scoringutils:::select_metrics(metrics_quantile(), select = "wis", exclude = "wis")),
     "wis"
   )
 
   # expect error if select is not included in the default possibilities
   expect_error(
-    rules_sample(select = "not-included"),
+    metrics_sample(select = "not-included"),
     "Must be a subset of"
   )
 
   # expect error if exclude is not included in the default possibilities
   expect_error(
-    rules_quantile(exclude = "not-included"),
+    metrics_quantile(exclude = "not-included"),
     "Must be a subset of"
   )
 })
diff --git a/tests/testthat/test-score.R b/tests/testthat/test-score.R
index f4f40b26c..f569ad6d3 100644
--- a/tests/testthat/test-score.R
+++ b/tests/testthat/test-score.R
@@ -189,19 +189,19 @@ test_that("function produces output for a point case", {
   )
   expect_equal(
     colnames(eval),
-    c("model", "target_type", names(rules_point()))
+    c("model", "target_type", names(metrics_point()))
   )
 })
 
 test_that("Changing metrics names works", {
-  metrics_test <- rules_point()
+  metrics_test <- metrics_point()
   names(metrics_test)[1] = "just_testing"
   eval <- suppressMessages(score(as_forecast(example_point),
                                  metrics = metrics_test))
   eval_summarised <- summarise_scores(eval, by = "model")
   expect_equal(
     colnames(eval_summarised),
-    c("model", "just_testing", names(rules_point())[-1])
+    c("model", "just_testing", names(metrics_point())[-1])
   )
 })
 
@@ -225,7 +225,7 @@ test_that("score_quantile correctly handles separate results = FALSE", {
     nrow(eval) > 1,
     TRUE
   )
-  expect_true(all(names(rules_quantile()) %in% colnames(eval)))
+  expect_true(all(names(metrics_quantile()) %in% colnames(eval)))
 })
 
 
@@ -329,13 +329,13 @@ test_that("function throws an error if data is missing", {
 })
 
 # =============================================================================
-# `apply_rules()`
+# `apply_metrics()`
 # =============================================================================
 
-test_that("apply_rules() works", {
+test_that("apply_metrics() works", {
 
   dt <- data.table::data.table(x = 1:10)
-  scoringutils:::apply_rules(
+  scoringutils:::apply_metrics(
     data = dt, metrics = list("test" = function(x) x + 1),
     dt$x
   )
@@ -343,7 +343,7 @@ test_that("apply_rules() works", {
 
   # additional named argument works
   expect_no_condition(
-    scoringutils:::apply_rules(
+    scoringutils:::apply_metrics(
       data = dt, metrics = list("test" = function(x) x + 1),
       dt$x, y = dt$test)
   )
@@ -351,7 +351,7 @@ test_that("apply_rules() works", {
   # additional unnamed argument does not work
 
   expect_warning(
-    scoringutils:::apply_rules(
+    scoringutils:::apply_metrics(
       data = dt, metrics = list("test" = function(x) x + 1),
       dt$x, dt$test)
   )