test_parameters.R 1.47 KB
Newer Older
1
2
3
4
5
6
7
8
9
data(agaricus.train, package='lightgbm')
data(agaricus.test, package='lightgbm')
train <- agaricus.train
test <- agaricus.test

test_that("Feature penalties work properly", {
  # Fit a series of models with varying penalty on most important variable
  var_name <- "odor=none"
  var_index <- which(train$data@Dimnames[[2]] == var_name)
10

11
12
13
14
15
16
17
18
  bst <- lapply(seq(1, 0, by = -0.1), function(x) {
    feature_penalties <- rep(1, ncol(train$data))
    feature_penalties[var_index] <- x
    lightgbm(
      data = train$data,
      label = train$label,
      num_leaves = 5,
      learning_rate = 0.05,
19
20
      nrounds = 20,
      objective = "binary",
21
22
23
24
25
      feature_penalty = paste0(feature_penalties, collapse = ","),
      metric="binary_error",
      verbose = -1
    )
  })
26

27
28
29
  var_gain <- lapply(bst, function(x) lgb.importance(x)[Feature == var_name, Gain])
  var_cover <- lapply(bst, function(x) lgb.importance(x)[Feature == var_name, Cover])
  var_freq <- lapply(bst, function(x) lgb.importance(x)[Feature == var_name, Frequency])
30

31
32
33
34
  # Ensure that feature gain, cover, and frequency decreases with stronger penalties
  expect_true(all(diff(unlist(var_gain)) <= 0))
  expect_true(all(diff(unlist(var_cover)) <= 0))
  expect_true(all(diff(unlist(var_freq)) <= 0))
35

36
37
38
  expect_lt(min(diff(unlist(var_gain))), 0)
  expect_lt(min(diff(unlist(var_cover))), 0)
  expect_lt(min(diff(unlist(var_freq))), 0)
39

40
41
  # Ensure that feature is not used when feature_penalty = 0
  expect_length(var_gain[[length(var_gain)]], 0)
42
})