tidymodels · EmilHvitfeldt · Sep 26, 2024 · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -18,7 +18,7 @@ URL: https://embed.tidymodels.org, https://github.com/tidymodels/embed
 BugReports: https://github.com/tidymodels/embed/issues
 Depends: 
     R (>= 3.6),
-    recipes (>= 1.0.7)
+    recipes (>= 1.1.0.9000)
 Imports:
     glue,
     dplyr (>= 1.1.0),
@@ -52,6 +52,8 @@ Suggests:
     testthat (>= 3.0.0),
     VBsparsePCA,
     xgboost
+Remotes:
+    tidymodels/recipes
 ByteCompile: true
 Config/Needs/website: tidymodels, ggiraph, tidyverse/tidytemplate, reticulate
 Config/testthat/edition: 3

diff --git a/tests/testthat/_snaps/collapse_cart.md b/tests/testthat/_snaps/collapse_cart.md
@@ -1,3 +1,11 @@
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = ames[, -1])
+    Condition
+      Error in `step_collapse_cart()`:
+      ! The following required column is missing from `new_data`: MS_SubClass.
+
 # empty printing
 
     Code

diff --git a/tests/testthat/_snaps/collapse_stringdist.md b/tests/testthat/_snaps/collapse_stringdist.md
@@ -1,3 +1,11 @@
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = ames[, -1])
+    Condition
+      Error in `step_collapse_stringdist()`:
+      ! The following required column is missing from `new_data`: MS_SubClass.
+
 # empty printing
 
     Code

diff --git a/tests/testthat/_snaps/discretize_cart.md b/tests/testthat/_snaps/discretize_cart.md
@@ -88,6 +88,22 @@
       -- Operations 
       * Discretizing variables using CART: x and z | Trained, weighted
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      rec_trained <- prep(rec, training = sim_tr_cls, verbose = FALSE)
+    Condition
+      Warning:
+      `step_discretize_cart()` failed to find any meaningful splits for predictor 'z', which will not be binned.
+
+---
+
+    Code
+      bake(rec_trained, new_data = sim_tr_cls[, -1])
+    Condition
+      Error in `step_discretize_cart()`:
+      ! The following required column is missing from `new_data`: x.
+
 # empty printing
 
     Code

diff --git a/tests/testthat/_snaps/discretize_xgb.md b/tests/testthat/_snaps/discretize_xgb.md
@@ -4,107 +4,107 @@
       xgboost
     Output
       ##### xgb.Booster
-      raw: 74.2 Kb 
+      raw: 74.2 Kb
       call:
-        xgboost::xgb.train(params = .params, data = .train, nrounds = 100, 
-          watchlist = list(train = .train, test = .test), verbose = 0, 
-          early_stopping_rounds = 10, tree_method = "hist", objective = .objective, 
-          nthread = 1)
+      xgboost::xgb.train(params = .params, data = .train, nrounds = 100,
+      watchlist = list(train = .train, test = .test), verbose = 0,
+      early_stopping_rounds = 10, tree_method = "hist", objective = .objective,
+      nthread = 1)
       params (as set within xgb.train):
-        eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", tree_method = "hist", objective = "binary:logistic", nthread = "1", validate_parameters = "TRUE"
+      eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", tree_method = "hist", objective = "binary:logistic", nthread = "1", validate_parameters = "TRUE"
       xgb.attributes:
-        best_iteration, best_msg, best_ntreelimit, best_score, niter
+      best_iteration, best_msg, best_ntreelimit, best_score, niter
       callbacks:
-        cb.evaluation.log()
-        cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize, 
-          verbose = verbose)
-      # of features: 13 
+      cb.evaluation.log()
+      cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize,
+      verbose = verbose)
+      # of features: 13
       niter: 96
-      best_iteration : 86 
-      best_ntreelimit : 86 
-      best_score : 0.4421503 
-      best_msg : [86]	train-logloss:0.417583	test-logloss:0.442150 
-      nfeatures : 13 
+      best_iteration : 86
+      best_ntreelimit : 86
+      best_score : 0.4421503
+      best_msg : [86]	train-logloss:0.417583	test-logloss:0.442150
+      nfeatures : 13
       evaluation_log:
-           iter train_logloss test_logloss
-          <num>         <num>        <num>
-              1     0.6279229    0.6303495
-              2     0.5869984    0.5894989
-      ---                                 
-             95     0.4157892    0.4425857
-             96     0.4156102    0.4432699
+      iter train_logloss test_logloss
+      <num>         <num>        <num>
+      1     0.6279229    0.6303495
+      2     0.5869984    0.5894989
+      ---           ---          ---
+      95     0.4157892    0.4425857
+      96     0.4156102    0.4432699
 
 # run_xgboost for multi-classification
 
     Code
       xgboost
     Output
       ##### xgb.Booster
-      raw: 149.7 Kb 
+      raw: 149.7 Kb
       call:
-        xgboost::xgb.train(params = .params, data = .train, nrounds = 100, 
-          watchlist = list(train = .train, test = .test), verbose = 0, 
-          early_stopping_rounds = 10, tree_method = "hist", objective = .objective, 
-          nthread = 1)
+      xgboost::xgb.train(params = .params, data = .train, nrounds = 100,
+      watchlist = list(train = .train, test = .test), verbose = 0,
+      early_stopping_rounds = 10, tree_method = "hist", objective = .objective,
+      nthread = 1)
       params (as set within xgb.train):
-        eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", num_class = "6", tree_method = "hist", objective = "multi:softprob", nthread = "1", validate_parameters = "TRUE"
+      eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", num_class = "6", tree_method = "hist", objective = "multi:softprob", nthread = "1", validate_parameters = "TRUE"
       xgb.attributes:
-        best_iteration, best_msg, best_ntreelimit, best_score, niter
+      best_iteration, best_msg, best_ntreelimit, best_score, niter
       callbacks:
-        cb.evaluation.log()
-        cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize, 
-          verbose = verbose)
-      # of features: 30 
+      cb.evaluation.log()
+      cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize,
+      verbose = verbose)
+      # of features: 30
       niter: 33
-      best_iteration : 23 
-      best_ntreelimit : 23 
-      best_score : 1.246428 
-      best_msg : [23]	train-mlogloss:1.178121	test-mlogloss:1.246428 
-      nfeatures : 30 
+      best_iteration : 23
+      best_ntreelimit : 23
+      best_score : 1.246428
+      best_msg : [23]	train-mlogloss:1.178121	test-mlogloss:1.246428
+      nfeatures : 30
       evaluation_log:
-           iter train_mlogloss test_mlogloss
-          <num>          <num>         <num>
-              1       1.623174      1.631783
-              2       1.515108      1.531188
-      ---                                   
-             32       1.159813      1.249701
-             33       1.158088      1.250462
+      iter train_mlogloss test_mlogloss
+      <num>          <num>         <num>
+      1       1.623174      1.631783
+      2       1.515108      1.531188
+      ---            ---           ---
+      32       1.159813      1.249701
+      33       1.158088      1.250462
 
 # run_xgboost for regression
 
     Code
       xgboost
     Output
       ##### xgb.Booster
-      raw: 40.2 Kb 
+      raw: 40.2 Kb
       call:
-        xgboost::xgb.train(params = .params, data = .train, nrounds = 100, 
-          watchlist = list(train = .train, test = .test), verbose = 0, 
-          early_stopping_rounds = 10, tree_method = "hist", objective = .objective, 
-          nthread = 1)
+      xgboost::xgb.train(params = .params, data = .train, nrounds = 100,
+      watchlist = list(train = .train, test = .test), verbose = 0,
+      early_stopping_rounds = 10, tree_method = "hist", objective = .objective,
+      nthread = 1)
       params (as set within xgb.train):
-        eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", tree_method = "hist", objective = "reg:squarederror", nthread = "1", validate_parameters = "TRUE"
+      eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", tree_method = "hist", objective = "reg:squarederror", nthread = "1", validate_parameters = "TRUE"
       xgb.attributes:
-        best_iteration, best_msg, best_ntreelimit, best_score, niter
+      best_iteration, best_msg, best_ntreelimit, best_score, niter
       callbacks:
-        cb.evaluation.log()
-        cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize, 
-          verbose = verbose)
-      # of features: 73 
+      cb.evaluation.log()
+      cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize,
+      verbose = verbose)
+      # of features: 73
       niter: 50
-      best_iteration : 40 
-      best_ntreelimit : 40 
-      best_score : 0.1165337 
-      best_msg : [40]	train-rmse:0.064010	test-rmse:0.116534 
-      nfeatures : 73 
+      best_iteration : 40
+      best_ntreelimit : 40
+      best_score : 0.1165337
+      best_msg : [40]	train-rmse:0.064010	test-rmse:0.116534
+      nfeatures : 73
       evaluation_log:
-           iter train_rmse test_rmse
-          <num>      <num>     <num>
-              1 3.31007782 3.3068878
-              2 2.31969213 2.3262197
-      ---                           
-             49 0.06207940 0.1175223
-             50 0.06191289 0.1188113
+      iter train_rmse test_rmse
+      <num>      <num>     <num>
+      1 3.31007782 3.3068878
+      2 2.31969213 2.3262197
+      ---        ---       ---
+      49 0.06207940 0.1175223
+      50 0.06191289 0.1188113
 
 # xgb_binning for classification
 
@@ -292,6 +292,14 @@
       -- Operations 
       * Discretizing variables using xgboost: x and z | Trained, weighted
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = sim_tr_cls[, -1])
+    Condition
+      Error in `step_discretize_xgb()`:
+      ! The following required column is missing from `new_data`: x.
+
 # empty printing
 
     Code

diff --git a/tests/testthat/_snaps/embed.md b/tests/testthat/_snaps/embed.md
@@ -39,6 +39,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `x3_embed_1`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = ex_dat[, -3])
+    Condition
+      Error in `step_embed()`:
+      ! The following required column is missing from `new_data`: x3.
+
 # empty printing
 
     Code

diff --git a/tests/testthat/_snaps/feature_hash.md b/tests/testthat/_snaps/feature_hash.md
@@ -18,6 +18,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `x3_hash_01`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = ex_dat[, -3])
+    Condition
+      Error in `step_feature_hash()`:
+      ! The following required column is missing from `new_data`: x3.
+
 # empty printing
 
     Code

diff --git a/tests/testthat/_snaps/lencode_bayes.md b/tests/testthat/_snaps/lencode_bayes.md
@@ -101,6 +101,14 @@
       -- Operations 
       * Linear embedding for factors via Bayesian GLM for: x3 | Trained, weighted
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = ex_dat[, -3])
+    Condition
+      Error in `step_lencode_bayes()`:
+      ! The following required column is missing from `new_data`: x3.
+
 # empty printing
 
     Code

diff --git a/tests/testthat/_snaps/lencode_glm.md b/tests/testthat/_snaps/lencode_glm.md
@@ -54,6 +54,14 @@
       -- Operations 
       * Linear embedding for factors via GLM for: x3 | Trained, weighted
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = ex_dat[, -3])
+    Condition
+      Error in `step_lencode_glm()`:
+      ! The following required column is missing from `new_data`: x3.
+
 # empty printing
 
     Code

diff --git a/tests/testthat/_snaps/lencode_mixed.md b/tests/testthat/_snaps/lencode_mixed.md
@@ -49,6 +49,14 @@
       -- Operations 
       * Linear embedding for factors via mixed effects for: x3 | Trained, weighted
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = ex_dat[, -3])
+    Condition
+      Error in `step_lencode_mixed()`:
+      ! The following required column is missing from `new_data`: x3.
+
 # empty printing
 
     Code

diff --git a/tests/testthat/_snaps/pca_sparse.md b/tests/testthat/_snaps/pca_sparse.md
@@ -26,6 +26,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `PC1`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = tr[, -3])
+    Condition
+      Error in `step_pca_sparse()`:
+      ! The following required column is missing from `new_data`: avg_inten_ch_1.
+
 # empty printing
 
     Code

diff --git a/tests/testthat/_snaps/pca_sparse_bayes.md b/tests/testthat/_snaps/pca_sparse_bayes.md
@@ -26,6 +26,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `PC1`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = tr[, -3])
+    Condition
+      Error in `step_pca_sparse_bayes()`:
+      ! The following required column is missing from `new_data`: avg_inten_ch_1.
+
 # empty printing
 
     Code

diff --git a/tests/testthat/_snaps/pca_truncated.md b/tests/testthat/_snaps/pca_truncated.md
@@ -8,6 +8,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `PC1`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = tr[, -3])
+    Condition
+      Error in `step_pca_truncated()`:
+      ! The following required column is missing from `new_data`: avg_inten_ch_1.
+
 # empty printing
 
     Code

diff --git a/tests/testthat/_snaps/woe.md b/tests/testthat/_snaps/woe.md
@@ -109,6 +109,14 @@
       Caused by error in `dictionary()`:
       ! 'outcome' must have exactly 2 categories (has 3)
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = credit_data[, -8])
+    Condition
+      Error in `step_woe()`:
+      ! The following required column is missing from `new_data`: Job.
+
 # empty printing
 
     Code