"vscode:/vscode.git/clone" did not exist on "24f1f9cddf163775f9d484a04ca46ceca31615a3"
test_parameters.R 2.84 KB
Newer Older
1
2
3

context("feature penalties")

4
5
data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = "lightgbm")
6
7
8
9
10
11
train <- agaricus.train
test <- agaricus.test

test_that("Feature penalties work properly", {
  # Fit a series of models with varying penalty on most important variable
  var_name <- "odor=none"
12
  var_index <- which(train$data@Dimnames[[2L]] == var_name)
13

14
15
  bst <- lapply(seq(1.0, 0.0, by = -0.1), function(x) {
    feature_penalties <- rep(1.0, ncol(train$data))
16
17
    feature_penalties[var_index] <- x
    lightgbm(
18
19
      data = train$data
      , label = train$label
20
      , num_leaves = 5L
21
      , learning_rate = 0.05
22
      , nrounds = 20L
23
24
25
      , objective = "binary"
      , feature_penalty = paste0(feature_penalties, collapse = ",")
      , metric = "binary_error"
26
      , verbose = -1L
27
28
    )
  })
29

30
31
32
  var_gain <- lapply(bst, function(x) lgb.importance(x)[Feature == var_name, Gain])
  var_cover <- lapply(bst, function(x) lgb.importance(x)[Feature == var_name, Cover])
  var_freq <- lapply(bst, function(x) lgb.importance(x)[Feature == var_name, Frequency])
33

34
  # Ensure that feature gain, cover, and frequency decreases with stronger penalties
35
36
37
  expect_true(all(diff(unlist(var_gain)) <= 0.0))
  expect_true(all(diff(unlist(var_cover)) <= 0.0))
  expect_true(all(diff(unlist(var_freq)) <= 0.0))
38

39
40
41
  expect_lt(min(diff(unlist(var_gain))), 0.0)
  expect_lt(min(diff(unlist(var_cover))), 0.0)
  expect_lt(min(diff(unlist(var_freq))), 0.0)
42

43
  # Ensure that feature is not used when feature_penalty = 0
44
  expect_length(var_gain[[length(var_gain)]], 0L)
45
})
46

47
48
49
context("parameter aliases")

test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where names are unique", {
50
51
52
53
54
55
  param_aliases <- .PARAMETER_ALIASES()
  expect_true(is.list(param_aliases))
  expect_true(is.character(names(param_aliases)))
  expect_true(is.character(param_aliases[["boosting"]]))
  expect_true(is.character(param_aliases[["early_stopping_round"]]))
  expect_true(is.character(param_aliases[["num_iterations"]]))
56
  expect_true(is.character(param_aliases[["pre_partition"]]))
57
58
59
  expect_true(length(names(param_aliases)) == length(param_aliases))
  expect_true(all(sapply(param_aliases, is.character)))
  expect_true(length(unique(names(param_aliases))) == length(param_aliases))
60
61
})

62
test_that("training should warn if you use 'dart' boosting, specified with 'boosting' or aliases", {
63
  for (boosting_param in .PARAMETER_ALIASES()[["boosting"]]) {
64
65
66
67
    expect_warning({
      result <- lightgbm(
        data = train$data
        , label = train$label
68
        , num_leaves = 5L
69
        , learning_rate = 0.05
70
        , nrounds = 5L
71
72
        , objective = "binary"
        , metric = "binary_error"
73
        , verbose = -1L
74
75
76
77
78
79
80
81
        , params = stats::setNames(
          object = "dart"
          , nm = boosting_param
        )
      )
    }, regexp = "Early stopping is not available in 'dart' mode")
  }
})