-
-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add wrapper methods #35
Changes from 6 commits
294c262
65188c9
010b08e
b43469f
2ccc85f
488e514
3cbb0cd
f0d169d
ce3d723
bac8a6f
3d56b2e
c5055d3
6763dee
0306314
c7f5f80
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,11 @@ Authors@R: | |
family = "Bischl", | ||
role = "aut", | ||
email = "[email protected]", | ||
comment = c(ORCID = "0000-0001-6002-6980"))) | ||
comment = c(ORCID = "0000-0001-6002-6980")), | ||
person(given = "Marc", | ||
family = "Becker", | ||
role = "aut", | ||
email = "[email protected]")) | ||
Description: Implements methods for feature selection and | ||
filtering in mlr3. | ||
License: MIT + file LICENSE | ||
|
@@ -57,6 +61,9 @@ NeedsCompilation: no | |
Roxygen: list(markdown = TRUE) | ||
RoxygenNote: 6.1.1 | ||
Collate: | ||
'FeatureSelection.R' | ||
'FeatureSelectionForward.R' | ||
'FeatureSelectionRandom.R' | ||
'Filter.R' | ||
'FilterAUC.R' | ||
'FilterCMIM.R' | ||
|
@@ -73,6 +80,11 @@ Collate: | |
'FilterSymmetricalUncertainty.R' | ||
'FilterVariableImportance.R' | ||
'FilterVariance.R' | ||
'PerformanceEvaluator.R' | ||
'Terminator.R' | ||
'TerminatorEvaluations.R' | ||
'TerminatorPerformanceStep.R' | ||
'TerminatorRuntime.R' | ||
'helpers.R' | ||
'mlr_filters.R' | ||
'reexports.R' | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#' @title Abstract FeatureSelection Class | ||
#' | ||
#' @description `FeatureSelection` class that implements the main functionality each fs must have. A fs is an object that describes the optimization method for choosing the features given within the `[PerformanceEvaluator]` object. | ||
#' | ||
#' @section Usage: | ||
#' ``` | ||
#' # Construction | ||
#' fs = FeatureSelectionr$new(id, pe, tm, settings = list()) | ||
#' | ||
#' # public members | ||
#' fs$id | ||
#' fs$pe | ||
#' fs$tm | ||
#' fs$settings | ||
#' | ||
#' # public methods | ||
#' fs$calculate() | ||
#' ``` | ||
#' @section Arguments: | ||
#' * `id` (`character(1)`):\cr | ||
#' The id of the FeatureSelection. | ||
#' * `pe` (`[PerformanceEvaluator]`). | ||
#' * `tm` (`[Terminator]`). | ||
#' * `settings` (`list`):\cr | ||
#' The settings for the FeatureSelection. | ||
#' | ||
#' @section Details: | ||
#' * `$new()` creates a new object of class `[FeatureSelection]`. | ||
#' * `$id` stores an identifier for this `[FeatureSelection]`. | ||
#' * `$pe` stores the [PerformanceEvaluator] to optimize. | ||
#' * `$tm` stores the `[Terminator]`. | ||
#' * `$settings` is a list of settings for this `[FeatureSelection]`. | ||
#' * `calculate()` performs the feature selection, until the budget of the `[Terminator]` in the `[PerformanceEvaluator]` is exhausted. | ||
#' @name FeatureSelection | ||
#' @family FeatureSelection | ||
NULL | ||
|
||
#' @export | ||
FeatureSelection = R6Class("FeatureSelection", | ||
public = list( | ||
id = NULL, | ||
pe = NULL, | ||
tm = NULL, | ||
settings = NULL, | ||
state = NULL, | ||
|
||
initialize = function(id, pe, tm, settings = list()) { | ||
self$id = checkmate::assert_string(id) | ||
self$pe = checkmate::assert_r6(pe, "PerformanceEvaluator") | ||
self$tm = checkmate::assert_r6(tm, "Terminator") | ||
self$settings = checkmate::assert_list(settings, names = "unique") | ||
}, | ||
|
||
calculate = function() { | ||
while(!self$tm$terminated) { | ||
private$calculate_step() | ||
} | ||
} | ||
), | ||
private = list( | ||
calculate_step = function() { | ||
states = private$generate_states() | ||
named_states = lapply(states, private$binary_to_features) | ||
|
||
private$eval_states_terminator(named_states) | ||
|
||
bmr = self$pe$get_best() | ||
features = bmr[[length(bmr)]]$features | ||
self$state = as.numeric(Reduce("|", lapply(features, function(x) x == self$pe$task$feature_names))) | ||
}, | ||
binary_to_features = function(binary_features) { | ||
task$feature_names[as.logical(binary_features)] | ||
}, | ||
eval_states_terminator = function(states) { | ||
self$tm$update_start(self$pe) | ||
self$pe$eval_states(states) | ||
self$tm$update_end(self$pe) | ||
} | ||
) | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#' @title FeatureSelectionForward | ||
#' | ||
#' @description | ||
#' FeatureSelection child class to conduct forward search. | ||
#' | ||
#' @section Usage: | ||
#' ``` | ||
#' fs = FeatureSelectionForward$new() | ||
#' ``` | ||
#' See [FeatureSelection] for a description of the interface. | ||
#' | ||
#' @section Arguments: | ||
#' * `pe` (`[PerformanceEvaluator]`). | ||
#' * `tm` (`[Terminator]`). | ||
#' | ||
#' @section Details: | ||
#' `$new()` creates a new object of class [FeatureSelectionForward]. | ||
#' `$get_result()` Returns selected features in each step. | ||
#' The interface is described in [FeatureSelection]. | ||
#' | ||
#' Each step is possibly executed in parallel via [mlr3::benchmark()] | ||
#' | ||
#' @name FeatureSelectionForward | ||
#' @family FeatureSelection | ||
#' @examples | ||
#' task = mlr3::mlr_tasks$get("pima") | ||
#' measures = mlr3::mlr_measures$mget(c("classif.acc")) | ||
#' task$measures = measures | ||
#' learner = mlr3::mlr_learners$get("classif.rpart") | ||
#' resampling = mlr3::mlr_resamplings$get("cv", param_vals = list(folds = 5L)) | ||
#' pe = PerformanceEvaluator$new(task, learner, resampling) | ||
#' tm = TerminatorPerformanceStep$new(threshold = 0.01) | ||
#' fs = FeatureSelectionForward$new(pe, tm) | ||
#' fs$calculate() | ||
#' fs$get_result() | ||
NULL | ||
|
||
#' @export | ||
#' @include FeatureSelection.R | ||
|
||
FeatureSelectionForward = R6Class("FeatureSelectionRandom", | ||
inherit = FeatureSelection, | ||
public = list( | ||
initialize = function(pe, tm, max_features = NA) { | ||
if(is.na(max_features)) { | ||
max_features = length(pe$task$feature_names) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hier sollte dann sowas stehen wie: super$initialize(
id = id,
[...]
param_set = ParamSet$new(list([..])),
param_vals = param_vals
) |
||
} | ||
|
||
super$initialize(id = "forward_selection", pe = pe, tm = tm, | ||
settings = list(max_features = checkmate::assert_numeric(max_features, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
lower = 1, | ||
upper = length(pe$task$feature_names)))) | ||
|
||
self$state = rep(0, length(pe$task$feature_names)) | ||
}, | ||
|
||
get_result = function() { | ||
bmr = self$pe$bmr[[length(self$pe$bmr)]]$get_best(self$pe$task$measures[[1L]]$id) | ||
list(features = bmr$task$feature_names, | ||
performance = bmr$aggregated) | ||
} | ||
), | ||
private = list( | ||
generate_states = function() { | ||
new_states = list() | ||
for (i in seq_along(self$state)) { | ||
if (self$state[i] == 0) { | ||
state = self$state | ||
state[i] = 1 | ||
new_states[[length(new_states) + 1]] = state | ||
} | ||
} | ||
new_states | ||
}, | ||
eval_states_terminator = function(states) { | ||
self$tm$update_start(self$pe) | ||
self$pe$eval_states(states) | ||
self$tm$update_end(self$pe) | ||
|
||
# Side-effect stop maximum features | ||
if(!self$tm$terminated) { | ||
self$tm$terminated = (length(states[[1]]) == self$settings$max_features) | ||
} | ||
} | ||
) | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
#' @title FeatureSelectionRandom | ||
#' | ||
#' @description | ||
#' FeatureSelection child class to conduct random search | ||
#' | ||
#' @section Usage: | ||
#' ``` | ||
#' fs = FeatureSelectionRandom$new() | ||
#' ``` | ||
#' See [FeatureSelection] for a description of the interface. | ||
#' | ||
#' @section Arguments: | ||
#' * `pe` (`[PerformanceEvaluator]`). | ||
#' * `tm` (`[Terminator]`). | ||
#' * `max_features` (`integer(1)`) | ||
#' Maximum number of features | ||
#' * `batch_size` (`integer(1`): | ||
#' Maximum number of feature combinations to try in a batch. | ||
#' Each batch is possibly executed in parallel via [mlr3::benchmark()]. | ||
#' | ||
#' @section Details: | ||
#' `$new()` creates a new object of class [FeatureSelectionRandom]. | ||
#' `$get_result()` Returns best feature combination. | ||
#' The interface is described in [FeatureSelection]. | ||
#' | ||
#' @name FeatureSelectionRandom | ||
#' @family FeatureSelection | ||
#' @examples | ||
#' task = mlr3::mlr_tasks$get("boston_housing") | ||
#' learner = mlr3::mlr_learners$get("regr.rpart") | ||
#' resampling = mlr3::mlr_resamplings$get("cv", param_vals = list(folds = 5L)) | ||
#' pe = PerformanceEvaluator$new(task = task,learner = learner, resampling = resampling) | ||
#' tm = TerminatorEvaluations$new(max_evaluations = 20) | ||
#' fs = FeatureSelectionRandom$new(pe, tm, batch_size = 10, max_features = 8) | ||
#' fs$calculate() | ||
#' fs$get_result() | ||
NULL | ||
|
||
#' @export | ||
#' @include FeatureSelection.R | ||
|
||
FeatureSelectionRandom = R6Class("FeatureSelectionRandom", | ||
inherit = FeatureSelection, | ||
public = list( | ||
initialize = function(pe, tm, max_features = NA, batch_size = 10) { | ||
super$initialize(id = "random_selection", pe = pe, tm = tm, | ||
settings = list(max_features = checkmate::assert_numeric(max_features, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
lower = 1, | ||
upper = length(pe$task$feature_names)), | ||
batch_size = checkmate::assert_numeric(batch_size))) | ||
}, | ||
|
||
get_result = function() { | ||
if(length(self$pe$bmr) > 1) { | ||
bmr = lapply(self$pe$bmr[1:length(self$pe$bmr)], function(bmr) self$pe$bmr[[1]]$combine(bmr)) | ||
} else { | ||
bmr = self$pe$bmr | ||
} | ||
bmr_best = bmr[[length(bmr)]]$get_best(self$pe$task$measures[[1L]]$id) | ||
list(features = bmr_best$task$feature_names, | ||
performance = bmr_best$aggregated) | ||
} | ||
), | ||
private = list( | ||
generate_states = function() { | ||
lapply(seq_len(self$settings$batch_size), function(i) { | ||
if(is.na(self$settings$max_features)) { | ||
return(rbinom(length(self$pe$task$feature_names), 1, 0.5)) | ||
} | ||
x = Inf | ||
while (sum(x) >= self$settings$max_features) { | ||
x = rbinom(length(self$pe$task$feature_names), 1, 0.5) | ||
} | ||
return(x) | ||
} | ||
) | ||
} | ||
) | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
#' @title Abstract PerformanceEvaluator Class | ||
#' | ||
#' @description | ||
#' `PerformanceEvaluator` class that implements the performance evaluation on a set of feature combinations. A pe is an object that stores all informations that are necesarry to conduct a feature selection (`mlr3::Task`, `mlr3::Learner`, `mlr3::Resampling`). | ||
#' | ||
#' @section Usage: | ||
#' ``` | ||
#' # Construction | ||
#' pe = PerformanceEvaluator$new() | ||
#' | ||
#' # Public members | ||
#' pe$task | ||
#' pe$learner | ||
#' pe$resampling | ||
#' pe$bmr | ||
#' | ||
#' # Public methods | ||
#' pe$eval_states(states) | ||
#' pe$get_best() | ||
#' ``` | ||
#' | ||
#' @section Arguments: | ||
#' * `task` (`mlr3::Task`): | ||
#' The task that we want to evaluate. | ||
#' * `learner` (`mlr3::Learner`): | ||
#' The learner that we want to evaluate. | ||
#' * `resampling` (`mlr3::Resampling`): | ||
#' The Resampling method that is used to evaluate the learner. | ||
#' | ||
#' @section Details: | ||
#' * `$new()` creates a new object of class [PerformanceEvaluator]. | ||
#' * `$task` (`mlr3::Task`) the task for which the feature selection should be conducted. | ||
#' * `$learner` (`mlr3::Learner`) the algorithm for which the feature selection should be conducted. | ||
#' * `$resampling` (`mlr3::Resampling`) strategy to evaluate a feature combination | ||
#' * `$bmr` (`list`) of (`mlr3::BenchmarkResult`) objects. Each entry corresponds to one batch or step depending one the used feature selection method. | ||
#' * `$eval_states(states)` evaluates the feature combinations `states`. | ||
#' * `$get_best()` returns selected features with the best performance of each entry in `$bmr`. | ||
#' | ||
#' @name PerformanceEvaluator | ||
#' @keywords internal | ||
#' @family PerformanceEvaluator | ||
#' @examples | ||
#' task = mlr3::mlr_tasks$get("iris") | ||
#' learner = mlr3::mlr_learners$get("classif.rpart") | ||
#' resampling = mlr3::mlr_resamplings$get("holdout") | ||
#' pe = PerformanceEvaluator$new(task, learner, resampling) | ||
NULL | ||
|
||
#' @export | ||
PerformanceEvaluator = R6Class("PerformanceEvaluator", | ||
public = list( | ||
task = NULL, | ||
learner = NULL, | ||
resampling = NULL, | ||
bmr = list(), | ||
states = list(), | ||
|
||
initialize = function(task, learner, resampling) { | ||
self$task = mlr3::assert_task(task) | ||
self$learner = mlr3::assert_learner(learner, task = task) | ||
self$resampling = mlr3::assert_resampling(resampling) | ||
}, | ||
|
||
eval_states = function(states) { | ||
self$states[[length(self$states)+1]] <- states | ||
# For each state, clone task and set feature subset | ||
task_list <- list() | ||
for(state in states) { | ||
task = self$task$clone() | ||
task$select(state) | ||
task_list[[length(task_list)+1]] <- task | ||
} | ||
|
||
new_bmr = benchmark(data.table::data.table(task = task_list, | ||
learner = list(self$learner), | ||
resampling = list(self$resampling))) | ||
|
||
self$bmr[[length(self$bmr)+1]] <- new_bmr | ||
}, | ||
|
||
get_best = function() { | ||
lapply(self$bmr, function(x) { | ||
rr = x$get_best(self$task$measures[[1L]]$id) | ||
list(features = rr$task$feature_names, | ||
performance = mean(rr$performance(self$task$measures[[1L]]$id))) | ||
}) | ||
} | ||
) | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.