Skip to contents

Usage

fsi(
  task,
  learner,
  resampling,
  measures = NULL,
  terminator,
  store_benchmark_result = TRUE,
  store_models = FALSE,
  check_values = FALSE
)

Arguments

task

(mlr3::Task)
Task to operate on.

learner

(mlr3::Learner)
Learner to optimize the feature subset for.

resampling

(mlr3::Resampling)
Resampling that is used to evaluated the performance of the feature subsets. Uninstantiated resamplings are instantiated during construction so that all feature subsets are evaluated on the same data splits. Already instantiated resamplings are kept unchanged.

measures

(mlr3::Measure or list of mlr3::Measure)
A single measure creates a FSelectInstanceSingleCrit and multiple measures a FSelectInstanceMultiCrit. If NULL, default measure is used.

terminator

(Terminator)
Stop criterion of the feature selection.

store_benchmark_result

(logical(1))
Store benchmark result in archive?

store_models

(logical(1)). Store models in benchmark result?

check_values

(logical(1))
Check the parameters before the evaluation and the results for validity?

Resources

Examples

# Feature selection on Palmer Penguins data set
# \donttest{

task = tsk("penguins")
learner = lrn("classif.rpart")

# Construct feature selection instance
instance = fsi(
  task = task,
  learner = learner,
  resampling = rsmp("cv", folds = 3),
  measures = msr("classif.ce"),
  terminator = trm("evals", n_evals = 4)
)

# Choose optimization algorithm
fselector = fs("random_search", batch_size = 2)

# Run feature selection
fselector$optimize(instance)
#>    bill_depth bill_length body_mass flipper_length island  sex year
#> 1:       TRUE        TRUE     FALSE           TRUE   TRUE TRUE TRUE
#>                                                 features classif.ce
#> 1: bill_depth,bill_length,flipper_length,island,sex,year   0.058149

# Subset task to optimal feature set
task$select(instance$result_feature_set)

# Train the learner with optimal feature set on the full data set
learner$train(task)

# Inspect all evaluated sets
as.data.table(instance$archive)
#>    bill_depth bill_length body_mass flipper_length island   sex  year
#> 1:       TRUE        TRUE     FALSE           TRUE   TRUE  TRUE  TRUE
#> 2:       TRUE       FALSE      TRUE          FALSE   TRUE FALSE  TRUE
#> 3:      FALSE        TRUE      TRUE           TRUE  FALSE  TRUE  TRUE
#> 4:      FALSE        TRUE     FALSE           TRUE   TRUE FALSE FALSE
#>    classif.ce runtime_learners           timestamp batch_nr warnings errors
#> 1: 0.05814900            0.039 2023-01-26 18:33:56        1        0      0
#> 2: 0.18901602            0.021 2023-01-26 18:33:56        1        0      0
#> 3: 0.06979405            0.022 2023-01-26 18:33:56        2        0      0
#> 4: 0.06107297            0.020 2023-01-26 18:33:56        2        0      0
#>         resample_result
#> 1: <ResampleResult[21]>
#> 2: <ResampleResult[21]>
#> 3: <ResampleResult[21]>
#> 4: <ResampleResult[21]>
# }