Merge pull request #48 from mlr-org/doku_update

Update documentation of measures
mlr-org · Aug 5, 2024 · 787c584 · 787c584
2 parents d25e6ea + 242a097
commit 787c584
Show file tree

Hide file tree

Showing 37 changed files with 116 additions and 87 deletions.
diff --git a/R/binary_bbrier.R b/R/binary_bbrier.R
@@ -2,14 +2,12 @@
 #'
 #' @details
 #' The Binary Brier Score is defined as \deqn{
-#'    \frac{1}{n} \sum_{i=1}^n w_i (I_i - p_i)^2.
+#'    \frac{1}{n} \sum_{i=1}^n w_i (I_i - p_i)^2,
 #' }{
-#'    weighted.mean(((t == positive) - p)^2, w).
-#' }
-#' \if{latex}{
-#' \eqn{w_i} are the sample weights,
-#' \eqn{I_{i}}{I_i} is 1 if observation \eqn{i} belongs to the positive class, and 0 otherwise.
+#'    weighted.mean(((t == positive) - p)^2, w),
 #' }
+#' where \eqn{w_i} are the sample weights,
+#'  and \eqn{I_{i}} is 1 if observation \eqn{x_i} belongs to the positive class, and 0 otherwise.
 #'
 #' Note that this (more common) definition of the Brier score is equivalent to the
 #' original definition of the multi-class Brier score (see [mbrier()]) divided by 2.

diff --git a/R/binary_fn.R b/R/binary_fn.R
@@ -3,7 +3,7 @@
 #' @details
 #' This measure counts the false negatives (type 2 error), i.e. the number of
 #' predictions indicating a negative class label while in fact it is positive.
-#' This is sometimes also called a "false alarm".
+#' This is sometimes also called a "miss" or an "underestimation".
 #'
 #' @templateVar mid fn
 #' @template binary_template

diff --git a/R/binary_fp.R b/R/binary_fp.R
@@ -3,6 +3,7 @@
 #' @details
 #' This measure counts the false positives (type 1 error), i.e. the number of
 #' predictions indicating a positive class label while in fact it is negative.
+#' This is sometimes also called a "false alarm".
 #'
 #' @templateVar mid fp
 #' @template binary_template

diff --git a/R/binary_gmean.R b/R/binary_gmean.R
@@ -2,7 +2,7 @@
 #'
 #' @details
 #' Calculates the geometric mean of [recall()] R and [specificity()] S as \deqn{
-#'    \sqrt{\mathrm{R} \mathrm{S}}.
+#'    \sqrt{\mathrm{R} \cdot \mathrm{S}}.
 #' }{
 #'    sqrt(R * S)
 #' }

diff --git a/R/binary_gpr.R b/R/binary_gpr.R
@@ -2,7 +2,7 @@
 #'
 #' @details
 #' Calculates the geometric mean of [precision()] P and [recall()] R as \deqn{
-#'    \sqrt{\mathrm{P} \mathrm{R}}.
+#'    \sqrt{\mathrm{P} \cdot \mathrm{R}}.
 #' }{
 #'    sqrt(P * R)
 #' }

diff --git a/R/binary_prauc.R b/R/binary_prauc.R
@@ -4,7 +4,7 @@
 #' Computes the area under the Precision-Recall curve (PRC).
 #' The PRC can be interpreted as the relationship between precision and recall (sensitivity),
 #' and is considered to be a more appropriate measure for unbalanced datasets than the ROC curve.
-#' The PRC is computed by integration of the piecewise function.
+#' The AUC-PRC is computed by integration of the piecewise function.
 #'
 #' @templateVar mid prauc
 #' @template binary_template

diff --git a/R/binary_tn.R b/R/binary_tn.R
@@ -2,7 +2,7 @@
 #'
 #' @details
 #' This measure counts the true negatives, i.e. the number of
-#' predictions correctly indicating a negative class label.
+#' predictions correctly indicating a negative class label. This is sometimes also called a "correct rejection".
 #'
 #' @templateVar mid tn
 #' @template binary_template

diff --git a/R/binary_tnr.R b/R/binary_tnr.R
@@ -6,7 +6,7 @@
 #' }{
 #'    TN / (FP + TN).
 #' }
-#' Also know as "specificity".
+#' Also know as "specificity" or "selectivity".
 #'
 #' @templateVar mid tnr
 #' @template binary_template

diff --git a/R/binary_tp.R b/R/binary_tp.R
@@ -2,7 +2,7 @@
 #'
 #' @details
 #' This measure counts the true positives, i.e. the number of
-#' predictions correctly indicating a positive class label.
+#' predictions correctly indicating a positive class label. This is sometimes also called a "hit".
 #'
 #' @templateVar mid tp
 #' @template binary_template

diff --git a/R/binary_tpr.R b/R/binary_tpr.R
@@ -6,7 +6,7 @@
 #' }{
 #'   TP / (TP + FN).
 #' }
-#' Also know as "recall" or "sensitivity".
+#' This is also know as "recall", "sensitivity", or "probability of detection".
 #'
 #' @details
 #' This measure is undefined if TP + FN = 0.

diff --git a/R/classif_acc.R b/R/classif_acc.R
@@ -1,11 +1,11 @@
 #' @title Classification Accuracy
 #'
 #' @details
-#' The Classification Accuracy is defined as \deqn{
-#'   \frac{1}{n} \sum_{i=1}^n w_i \left( t_i = r_i \right).
-#' }{
-#'   weighted.mean(t == r, w).
-#' }
+#' The Classification Accuracy is defined as
+#'  \deqn{
+#'   \frac{1}{n} \sum_{i=1}^n w_i \mathbf{1} \left( t_i = r_i \right),
+#' }{1 / n * sum(wi * 1(ti = ri))}
+#' where \eqn{w_i} are normalized weights for all observations \eqn{x_i}.
 #'
 #' @templateVar mid acc
 #' @template classif_template

diff --git a/R/classif_bacc.R b/R/classif_bacc.R
@@ -4,18 +4,19 @@
 #' The Balanced Accuracy computes the weighted balanced accuracy, suitable for imbalanced data sets.
 #' It is defined analogously to the definition in [sklearn](https://scikit-learn.org/).
 #'
-#' First, the sample weights \eqn{w} are normalized per class:
+#' First, all sample weights \eqn{w_i} are normalized per class so that each class has the same influence:
 #' \deqn{
-#'  \hat{w}_i = \frac{w_i}{\sum_j 1(y_j = y_i) w_i}.
+#'  \hat{w}_i = \frac{w_i}{\sum_{j=1}^n w_j \cdot \mathbf{1}(t_j = t_i)}.
 #' }{
 #'  w_hat[i] = w[i] / sum((t == t[i]) * w[i]).
 #' }
-#' The balanced accuracy is calculated as
+#' The Balanced Accuracy is then calculated as
 #' \deqn{
-#'  \frac{1}{\sum_i \hat{w}_i} \sum_i 1(r_i = t_i) \hat{w}_i.
+#'  \frac{1}{\sum_{i=1}^n \hat{w}_i} \sum_{i=1}^n \hat{w}_i \cdot \mathbf{1}(r_i = t_i).
 #' }{
 #'  1 / sum(w_hat) * sum((r == t) * w_hat).
 #' }
+#' This definition is equivalent to [acc()] with class-balanced sample weights.
 #'
 #' @references
 #' `r format_bib("brodersen_2010", "guyon_2015")`

diff --git a/R/classif_ce.R b/R/classif_ce.R
@@ -2,10 +2,11 @@
 #'
 #' @details
 #' The Classification Error is defined as \deqn{
-#'   \frac{1}{n} \sum_{i=1}^n w_i \left( t_i \neq r_i \right).
+#'   \frac{1}{n} \sum_{i=1}^n w_i \mathbf{1} \left( t_i \neq r_i \right),
 #' }{
-#'   weighted.mean(t != r, w).
+#'   1 / n * sum(wi * 1(ti != ri)),
 #' }
+#' where \eqn{w_i} are normalized weights for each observation \eqn{x_i}.
 #'
 #' @templateVar mid ce
 #' @template classif_template

diff --git a/R/classif_logloss.R b/R/classif_logloss.R
@@ -1,12 +1,13 @@
 #' @title Log Loss
 #'
 #' @details
-#' The Log Loss is defined as \deqn{
+#' The Log Loss (a.k.a Benoulli Loss, Logistic Loss, Cross-Entropy Loss) is defined as
+#' \deqn{
 #'   -\frac{1}{n} \sum_{i=1}^n w_i \log \left(  p_i \right )
 #' }{
 #'   -weighted.mean(log(p), w)
 #' }
-#' where \eqn{p_i}{p} is the probability for the true class of observation \eqn{i}.
+#' where \eqn{p_i}{p} is the probability for the true class of observation \eqn{i} and \eqn{w_i} are normalized weights for each observation \eqn{x_i}.
 #'
 #' @templateVar mid logloss
 #' @template classif_template

diff --git a/R/classif_mbrier.R b/R/classif_mbrier.R
@@ -1,12 +1,13 @@
 #' @title Multiclass Brier Score
 #'
 #' @details
-#' Brier score for multi-class classification problems with \eqn{r} labels defined as \deqn{
-#'    \frac{1}{n} \sum_{i=1}^n \sum_{j=1}^r (I_{ij} - p_{ij})^2.
+#' Brier score for multi-class classification problems with \eqn{k} labels defined as \deqn{
+#'    \frac{1}{n} \sum_{i=1}^n \sum_{j=1}^k (I_{ij} - p_{ij})^2.
 #' }{
 #     1/n * sum_i sum_j (I_ij - p_ij)^2.
 #' }
-#' \eqn{I_{ij}}{I_ij} is 1 if observation \eqn{i} has true label \eqn{j}, and 0 otherwise.
+#' \eqn{I_{ij}}{I_ij} is 1 if observation \eqn{x_i} has true label \eqn{j}, and 0 otherwise.
+#' \eqn{p_{ij}{p_ij} is the probability that observation \eqn{x_i} belongs to class \eqn{j}.
 #'
 #' Note that there also is the more common definition of the Brier score for binary
 #' classification problems in [bbrier()].

diff --git a/R/classif_mcc.R b/R/classif_mcc.R
@@ -8,7 +8,7 @@
 #' }
 #' where \eqn{TP}, \eqn{FP}, \eqn{TN}, \eqn{TP} are the number of true positives, false positives, true negatives, and false negatives respectively.
 #'
-#' In the multi-class case, the Matthews Correlation Coefficient defined for a multi-class confusion matrix \eqn{C} with \eqn{K} classes: \deqn{
+#' In the multi-class case, the Matthews Correlation Coefficient is defined for a multi-class confusion matrix \eqn{C} with \eqn{K} classes: \deqn{
 #'    \frac{c \cdot s - \sum_k^K p_k \cdot t_k}{\sqrt{(s^2 - \sum_k^K p_k^2) \cdot (s^2 - \sum_k^K t_k^2)}},
 #' }{
 #'    (c * s - sum(pk * tk)) / sqrt((s^2 - sum(pk^2)) * (s^2 - sum(tk^2))),
@@ -23,8 +23,9 @@
 #' @template classif_template
 #'
 #' @details
-#' The above formula is undefined if any of the four sums in the denominator is 0 in the binary case and more generally if either \eqn{s^2 - sum(pk^2)} or \eqn{s^2 - sum(tk^2)} is equal to 0.
+#' The above formula is undefined if any of the four sums in the denominator is 0 in the binary case and more generally if either \eqn{s^2 - \sum_k^K p_k^2} or \eqn{s^2 - \sum_k^K t_k^2)} is equal to 0.
 #' The denominator is then set to 1.
+#'
 #' When there are more than two classes, the MCC will no longer range between -1 and +1.
 #' Instead, the minimum value will be between -1 and 0 depending on the true distribution. The maximum value is always +1.
 #'

diff --git a/R/classif_zero_one.R b/R/classif_zero_one.R
@@ -1,12 +1,16 @@
 #' @title Zero-One Classification Loss (per observation)
 #'
 #' @description
-#' Calculates the per-observation 0/1 loss as \deqn{
-#'   t_i \neq r_1.
+#' Calculates the per-observation 0/1 (zero-one) loss as \deqn{
+#'   \mathbf{1} (t_i \neq r_1).
 #' }{
-#'   t != r.
+#'   1(t != r).
+#' }
+#' The 1/0 (one-zero) loss is equal to 1 - zero-one and calculated as \deqn{
+#'   \mathbf{1} (t_i = r_i).
+#' }{
+#'   1(t_i = r_i).
 #' }
-#' The one-zero loss is 1 - zero-one.
 #'
 #' @templateVar mid zero_one
 #' @template classif_template

diff --git a/R/confusion_matrix.R b/R/confusion_matrix.R
@@ -2,7 +2,18 @@
 #'
 #' @description
 #' Calculates the confusion matrix for a binary classification problem
-#' once and then calculates all confusion measures of this package.
+#' once and then calculates all binary confusion measures of this package.
+#'
+#' @details
+#' The binary confusion matrix is defined as \deqn{
+#' \begin{pmatrix}
+#' TP & FP \\
+#' FN & TN
+#' \end{pmatrix}.
+#' }{
+#' matrix(TP, FP, FN, TN).
+#' }
+#' If `relative = TRUE`, all values are divided by \eqn{n}.
 #'
 #' @inheritParams binary_params
 #' @param relative (`logical(1)`)\cr

diff --git a/R/regr_ae.R b/R/regr_ae.R
@@ -1,6 +1,6 @@
 #' @title Absolute Error (per observation)
 #'
-#' @description
+#' @details
 #' Calculates the per-observation absolute error as \deqn{
 #'   \left| t_i - r_i \right|.
 #' }{

diff --git a/R/regr_ape.R b/R/regr_ape.R
@@ -1,6 +1,6 @@
 #' @title Absolute Percentage Error (per observation)
 #'
-#' @description
+#' @details
 #' Calculates the per-observation absolute percentage error as \deqn{
 #'   \left| \frac{ t_i - r_i}{t_i} \right|.
 #' }{

diff --git a/R/regr_bias.R b/R/regr_bias.R
@@ -2,10 +2,11 @@
 #'
 #' @details
 #' The Bias is defined as \deqn{
-#'   \frac{1}{n} \sum_{i=1}^n w_i \left( t_i - r_i \right).
+#'   \frac{1}{n} \sum_{i=1}^n w_i \left( t_i - r_i \right),
 #' }{
-#'   weighted.mean(t - r, w).
+#'   weighted.mean(t - r, w),
 #' }
+#' where \eqn{w_i} are normalized sample weights.
 #' Good predictions score close to 0.
 #'
 #' @templateVar mid bias

diff --git a/R/regr_ktau.R b/R/regr_ktau.R
@@ -2,6 +2,10 @@
 #'
 #' @details
 #' Kendall's tau is defined as Kendall's rank correlation coefficient between truth and response.
+#' It is defined as \deqn{
+#'   \tau = \frac{(\mathrm{number of concordant pairs)} - (\mathrm{number of discordant pairs)}}{\mathrm{(number of pairs)}}
+#'   }{
+#'     t = (number of concordant pairs) - (number of discordant pairs) / (number of pairs)}
 #' Calls [stats::cor()] with `method` set to `"kendall"`.
 #'
 #' @templateVar mid ktau

diff --git a/R/regr_mae.R b/R/regr_mae.R
@@ -2,10 +2,11 @@
 #'
 #' @details
 #' The Mean Absolute Error is defined as \deqn{
-#'   \frac{1}{n} \sum_{i=1}^n w_i \left| t_i - r_i \right|.
+#'   \frac{1}{n} \sum_{i=1}^n w_i \left| t_i - r_i \right|,
 #' }{
-#'   weighted.mean(abs(t - r), w).
+#'   weighted.mean(abs(t - r), w),
 #' }
+#' where \eqn{w_i} are normalized sample weights.
 #'
 #' @templateVar mid mae
 #' @template regr_template

diff --git a/R/regr_mape.R b/R/regr_mape.R
@@ -2,10 +2,11 @@
 #'
 #' @details
 #' The Mean Absolute Percent Error is defined as \deqn{
-#'   \frac{1}{n} \sum_{i=1}^n w_i \left| \frac{ t_i - r_i}{t_i} \right|.
+#'   \frac{1}{n} \sum_{i=1}^n w_i \left| \frac{ t_i - r_i}{t_i} \right|,
 #'  }{
-#'    weighted.mean(abs((t - r) / t), w).
+#'    weighted.mean(abs((t - r) / t), w),
 #'  }
+#' where \eqn{w_i} are normalized sample weights.
 #'
 #' @details
 #' This measure is undefined if any element of \eqn{t} is \eqn{0}.

diff --git a/R/regr_medae.R b/R/regr_medae.R
@@ -2,7 +2,7 @@
 #'
 #' @details
 #' The Median Absolute Error is defined as \deqn{
-#'   \mathop{\mathrm{median}}_i \left| t_i - r_i \right|.
+#'   \mathop{\mathrm{median}} \left| t_i - r_i \right|.
 #' }{
 #'   median(abs(t - r)).
 #' }

diff --git a/R/regr_medse.R b/R/regr_medse.R
@@ -2,7 +2,7 @@
 #'
 #' @details
 #' The Median Squared Error is defined as \deqn{
-#'   \mathop{\mathrm{median}}_i \left[ \left( t_i - r_i \right)^2 \right].
+#'   \mathop{\mathrm{median}} \left[ \left( t_i - r_i \right)^2 \right].
 #' }{
 #'   median((t - r)^2).
 #' }

diff --git a/R/regr_mse.R b/R/regr_mse.R
@@ -2,10 +2,11 @@
 #'
 #' @details
 #' The Mean Squared Error is defined as \deqn{
-#'   \frac{1}{n} w_i \sum_{i=1}^n \left( t_i - r_i \right)^2.
+#'   \frac{1}{n} \sum_{i=1}^n w_i \left( t_i - r_i \right)^2,
 #' }{
-#'   weighted.mean((t - r)^2, w).
+#'   weighted.mean((t - r)^2, w),
 #' }
+#' where \eqn{w_i} are normalized sample weights.
 #'
 #' @templateVar mid mse
 #' @template regr_template

diff --git a/R/regr_msle.R b/R/regr_msle.R
@@ -2,17 +2,16 @@
 #'
 #' @details
 #' The Mean Squared Log Error is defined as \deqn{
-#'   \frac{1}{n} \sum_{i=1}^n w_i \left( \ln (1 + t_i) - \ln (1 + r_i) \right)^2.
+#'   \frac{1}{n} \sum_{i=1}^n w_i \left( \ln (1 + t_i) - \ln (1 + r_i) \right)^2,
 #' }{
-#'   weighted.mean((log(1 + t) - log(1 + r))^2, weights).
+#'   weighted.mean((log(1 + t) - log(1 + r))^2, weights),
 #' }
+#' where \eqn{w_i} are normalized sample weights.
+#' This measure is undefined if any element of \eqn{t} or \eqn{r} is less than or equal to \eqn{-1}.
 #'
 #' @templateVar mid msle
 #' @template regr_template
 #'
-#' @details
-#' This measure is undefined if any element of \eqn{t} or \eqn{r} is less than or equal to \eqn{-1}.
-#'
 #' @inheritParams regr_params
 #' @template regr_example
 #' @export

diff --git a/R/regr_pbias.R b/R/regr_pbias.R
@@ -2,10 +2,11 @@
 #'
 #' @details
 #' The Percent Bias is defined as \deqn{
-#'   \frac{1}{n} \sum_{i=1}^n w_i \frac{\left( t_i - r_i \right)}{\left| t_i \right|}.
+#'   \frac{1}{n} \sum_{i=1}^n w_i \frac{\left( t_i - r_i \right)}{\left| t_i \right|},
 #' }{
-#'   weighted.mean((t - r) / abs(t), w).
+#'   weighted.mean((t - r) / abs(t), w),
 #' }
+#' where \eqn{w_i} are normalized sample weights.
 #' Good predictions score close to 0.
 #'
 #' @templateVar mid pbias