Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add wrapper methods #35

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@ Authors@R:
family = "Bischl",
role = "aut",
email = "[email protected]",
comment = c(ORCID = "0000-0001-6002-6980")))
comment = c(ORCID = "0000-0001-6002-6980")),
person(given = "Marc",
family = "Becker",
role = "aut",
email = "[email protected]"))
Description: Implements methods for feature selection and
filtering in mlr3.
License: MIT + file LICENSE
Expand Down Expand Up @@ -57,6 +61,9 @@ NeedsCompilation: no
Roxygen: list(markdown = TRUE)
RoxygenNote: 6.1.1
Collate:
'FeatureSelection.R'
'FeatureSelectionForward.R'
'FeatureSelectionRandom.R'
'Filter.R'
'FilterAUC.R'
'FilterCMIM.R'
Expand All @@ -73,6 +80,11 @@ Collate:
'FilterSymmetricalUncertainty.R'
'FilterVariableImportance.R'
'FilterVariance.R'
'PerformanceEvaluator.R'
'Terminator.R'
'TerminatorEvaluations.R'
'TerminatorPerformanceStep.R'
'TerminatorRuntime.R'
'helpers.R'
'mlr_filters.R'
'reexports.R'
Expand Down
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

S3method(as.data.table,DictionaryFilter)
S3method(as.data.table,Filter)
export(FeatureSelection)
export(FeatureSelectionForward)
export(FeatureSelectionRandom)
export(Filter)
export(FilterAUC)
export(FilterCMIM)
Expand All @@ -18,6 +21,11 @@ export(FilterRankCorrelation)
export(FilterSymmetricalUncertainty)
export(FilterVariableImportance)
export(FilterVariance)
export(PerformanceEvaluator)
export(Terminator)
export(TerminatorEvaluations)
export(TerminatorPerformanceStep)
export(TerminatorRuntime)
export(as.data.table)
export(mlr_filters)
import(checkmate)
Expand Down
80 changes: 80 additions & 0 deletions R/FeatureSelection.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#' @title Abstract FeatureSelection Class
#'
#' @description `FeatureSelection` class that implements the main functionality each fs must have. A fs is an object that describes the optimization method for choosing the features given within the `[PerformanceEvaluator]` object.
#'
#' @section Usage:
#' ```
#' # Construction
#' fs = FeatureSelectionr$new(id, pe, tm, settings = list())
#'
#' # public members
#' fs$id
#' fs$pe
#' fs$tm
#' fs$settings
#'
#' # public methods
#' fs$calculate()
#' ```
#' @section Arguments:
#' * `id` (`character(1)`):\cr
#' The id of the FeatureSelection.
#' * `pe` (`[PerformanceEvaluator]`).
#' * `tm` (`[Terminator]`).
#' * `settings` (`list`):\cr
#' The settings for the FeatureSelection.
#'
#' @section Details:
#' * `$new()` creates a new object of class `[FeatureSelection]`.
#' * `$id` stores an identifier for this `[FeatureSelection]`.
#' * `$pe` stores the [PerformanceEvaluator] to optimize.
#' * `$tm` stores the `[Terminator]`.
#' * `$settings` is a list of settings for this `[FeatureSelection]`.
#' * `calculate()` performs the feature selection, until the budget of the `[Terminator]` in the `[PerformanceEvaluator]` is exhausted.
#' @name FeatureSelection
#' @family FeatureSelection
NULL

#' @export
FeatureSelection = R6Class("FeatureSelection",
public = list(
id = NULL,
pe = NULL,
tm = NULL,
settings = NULL,
state = NULL,

initialize = function(id, pe, tm, settings = list()) {
self$id = checkmate::assert_string(id)
self$pe = checkmate::assert_r6(pe, "PerformanceEvaluator")
self$tm = checkmate::assert_r6(tm, "Terminator")
self$settings = checkmate::assert_list(settings, names = "unique")
},

calculate = function() {
while(!self$tm$terminated) {
private$calculate_step()
}
}
),
private = list(
calculate_step = function() {
states = private$generate_states()
named_states = lapply(states, private$binary_to_features)

private$eval_states_terminator(named_states)

bmr = self$pe$get_best()
features = bmr[[length(bmr)]]$features
self$state = as.numeric(Reduce("|", lapply(features, function(x) x == self$pe$task$feature_names)))
},
binary_to_features = function(binary_features) {
task$feature_names[as.logical(binary_features)]
},
eval_states_terminator = function(states) {
self$tm$update_start(self$pe)
self$pe$eval_states(states)
self$tm$update_end(self$pe)
}
)
)
86 changes: 86 additions & 0 deletions R/FeatureSelectionForward.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#' @title FeatureSelectionForward
#'
#' @description
#' FeatureSelection child class to conduct forward search.
#'
#' @section Usage:
#' ```
#' fs = FeatureSelectionForward$new()
#' ```
#' See [FeatureSelection] for a description of the interface.
#'
#' @section Arguments:
#' * `pe` (`[PerformanceEvaluator]`).
#' * `tm` (`[Terminator]`).
#'
#' @section Details:
#' `$new()` creates a new object of class [FeatureSelectionForward].
#' `$get_result()` Returns selected features in each step.
#' The interface is described in [FeatureSelection].
#'
#' Each step is possibly executed in parallel via [mlr3::benchmark()]
#'
#' @name FeatureSelectionForward
#' @family FeatureSelection
#' @examples
#' task = mlr3::mlr_tasks$get("pima")
#' measures = mlr3::mlr_measures$mget(c("classif.acc"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
#' measures = mlr3::mlr_measures$mget(c("classif.acc"))
#' measures = mlr3::mlr_measures$get(c("classif.acc"))

#' task$measures = measures
#' learner = mlr3::mlr_learners$get("classif.rpart")
#' resampling = mlr3::mlr_resamplings$get("cv", param_vals = list(folds = 5L))
#' pe = PerformanceEvaluator$new(task, learner, resampling)
#' tm = TerminatorPerformanceStep$new(threshold = 0.01)
#' fs = FeatureSelectionForward$new(pe, tm)
#' fs$calculate()
#' fs$get_result()
NULL

#' @export
#' @include FeatureSelection.R

FeatureSelectionForward = R6Class("FeatureSelectionRandom",
inherit = FeatureSelection,
public = list(
initialize = function(pe, tm, max_features = NA) {
if(is.na(max_features)) {
max_features = length(pe$task$feature_names)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hier sollte dann sowas stehen wie:

      super$initialize(
        id = id,
		[...]
        param_set = ParamSet$new(list([..])),
        param_vals = param_vals
      )

}

super$initialize(id = "forward_selection", pe = pe, tm = tm,
settings = list(max_features = checkmate::assert_numeric(max_features,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of settings we should use param_set here inheriting from paradox. Similar to how it is done in the Filter class.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self$param_set = assert_param_set(param_set)

lower = 1,
upper = length(pe$task$feature_names))))

self$state = rep(0, length(pe$task$feature_names))
},

get_result = function() {
bmr = self$pe$bmr[[length(self$pe$bmr)]]$get_best(self$pe$task$measures[[1L]]$id)
list(features = bmr$task$feature_names,
performance = bmr$aggregated)
}
),
private = list(
generate_states = function() {
new_states = list()
for (i in seq_along(self$state)) {
if (self$state[i] == 0) {
state = self$state
state[i] = 1
new_states[[length(new_states) + 1]] = state
}
}
new_states
},
eval_states_terminator = function(states) {
self$tm$update_start(self$pe)
self$pe$eval_states(states)
self$tm$update_end(self$pe)

# Side-effect stop maximum features
if(!self$tm$terminated) {
self$tm$terminated = (length(states[[1]]) == self$settings$max_features)
}
}
)
)
79 changes: 79 additions & 0 deletions R/FeatureSelectionRandom.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#' @title FeatureSelectionRandom
#'
#' @description
#' FeatureSelection child class to conduct random search
#'
#' @section Usage:
#' ```
#' fs = FeatureSelectionRandom$new()
#' ```
#' See [FeatureSelection] for a description of the interface.
#'
#' @section Arguments:
#' * `pe` (`[PerformanceEvaluator]`).
#' * `tm` (`[Terminator]`).
#' * `max_features` (`integer(1)`)
#' Maximum number of features
#' * `batch_size` (`integer(1`):
#' Maximum number of feature combinations to try in a batch.
#' Each batch is possibly executed in parallel via [mlr3::benchmark()].
#'
#' @section Details:
#' `$new()` creates a new object of class [FeatureSelectionRandom].
#' `$get_result()` Returns best feature combination.
#' The interface is described in [FeatureSelection].
#'
#' @name FeatureSelectionRandom
#' @family FeatureSelection
#' @examples
#' task = mlr3::mlr_tasks$get("boston_housing")
#' learner = mlr3::mlr_learners$get("regr.rpart")
#' resampling = mlr3::mlr_resamplings$get("cv", param_vals = list(folds = 5L))
#' pe = PerformanceEvaluator$new(task = task,learner = learner, resampling = resampling)
#' tm = TerminatorEvaluations$new(max_evaluations = 20)
#' fs = FeatureSelectionRandom$new(pe, tm, batch_size = 10, max_features = 8)
#' fs$calculate()
#' fs$get_result()
NULL

#' @export
#' @include FeatureSelection.R

FeatureSelectionRandom = R6Class("FeatureSelectionRandom",
inherit = FeatureSelection,
public = list(
initialize = function(pe, tm, max_features = NA, batch_size = 10) {
super$initialize(id = "random_selection", pe = pe, tm = tm,
settings = list(max_features = checkmate::assert_numeric(max_features,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

paradox::ParamSet()

lower = 1,
upper = length(pe$task$feature_names)),
batch_size = checkmate::assert_numeric(batch_size)))
},

get_result = function() {
if(length(self$pe$bmr) > 1) {
bmr = lapply(self$pe$bmr[1:length(self$pe$bmr)], function(bmr) self$pe$bmr[[1]]$combine(bmr))
} else {
bmr = self$pe$bmr
}
bmr_best = bmr[[length(bmr)]]$get_best(self$pe$task$measures[[1L]]$id)
list(features = bmr_best$task$feature_names,
performance = bmr_best$aggregated)
}
),
private = list(
generate_states = function() {
lapply(seq_len(self$settings$batch_size), function(i) {
if(is.na(self$settings$max_features)) {
return(rbinom(length(self$pe$task$feature_names), 1, 0.5))
}
x = Inf
while (sum(x) >= self$settings$max_features) {
x = rbinom(length(self$pe$task$feature_names), 1, 0.5)
}
return(x)
}
)
}
)
)
89 changes: 89 additions & 0 deletions R/PerformanceEvaluator.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#' @title Abstract PerformanceEvaluator Class
#'
#' @description
#' `PerformanceEvaluator` class that implements the performance evaluation on a set of feature combinations. A pe is an object that stores all informations that are necesarry to conduct a feature selection (`mlr3::Task`, `mlr3::Learner`, `mlr3::Resampling`).
#'
#' @section Usage:
#' ```
#' # Construction
#' pe = PerformanceEvaluator$new()
#'
#' # Public members
#' pe$task
#' pe$learner
#' pe$resampling
#' pe$bmr
#'
#' # Public methods
#' pe$eval_states(states)
#' pe$get_best()
#' ```
#'
#' @section Arguments:
#' * `task` (`mlr3::Task`):
#' The task that we want to evaluate.
#' * `learner` (`mlr3::Learner`):
#' The learner that we want to evaluate.
#' * `resampling` (`mlr3::Resampling`):
#' The Resampling method that is used to evaluate the learner.
#'
#' @section Details:
#' * `$new()` creates a new object of class [PerformanceEvaluator].
#' * `$task` (`mlr3::Task`) the task for which the feature selection should be conducted.
#' * `$learner` (`mlr3::Learner`) the algorithm for which the feature selection should be conducted.
#' * `$resampling` (`mlr3::Resampling`) strategy to evaluate a feature combination
#' * `$bmr` (`list`) of (`mlr3::BenchmarkResult`) objects. Each entry corresponds to one batch or step depending one the used feature selection method.
#' * `$eval_states(states)` evaluates the feature combinations `states`.
#' * `$get_best()` returns selected features with the best performance of each entry in `$bmr`.
#'
#' @name PerformanceEvaluator
#' @keywords internal
#' @family PerformanceEvaluator
#' @examples
#' task = mlr3::mlr_tasks$get("iris")
#' learner = mlr3::mlr_learners$get("classif.rpart")
#' resampling = mlr3::mlr_resamplings$get("holdout")
#' pe = PerformanceEvaluator$new(task, learner, resampling)
NULL

#' @export
PerformanceEvaluator = R6Class("PerformanceEvaluator",
public = list(
task = NULL,
learner = NULL,
resampling = NULL,
bmr = list(),
states = list(),

initialize = function(task, learner, resampling) {
self$task = mlr3::assert_task(task)
self$learner = mlr3::assert_learner(learner, task = task)
self$resampling = mlr3::assert_resampling(resampling)
},

eval_states = function(states) {
self$states[[length(self$states)+1]] <- states
# For each state, clone task and set feature subset
task_list <- list()
for(state in states) {
task = self$task$clone()
task$select(state)
task_list[[length(task_list)+1]] <- task
}

new_bmr = benchmark(data.table::data.table(task = task_list,
learner = list(self$learner),
resampling = list(self$resampling)))

self$bmr[[length(self$bmr)+1]] <- new_bmr
},

get_best = function() {
lapply(self$bmr, function(x) {
rr = x$get_best(self$task$measures[[1L]]$id)
list(features = rr$task$feature_names,
performance = mean(rr$performance(self$task$measures[[1L]]$id)))
})
}
)
)
Loading