test_parameters.R 4.17 KB
Newer Older
1
2
data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = "lightgbm")
3
4
5
6
7
8
train <- agaricus.train
test <- agaricus.test

test_that("Feature penalties work properly", {
  # Fit a series of models with varying penalty on most important variable
  var_name <- "odor=none"
9
  var_index <- which(train$data@Dimnames[[2L]] == var_name)
10

11
12
  bst <- lapply(seq(1.0, 0.0, by = -0.1), function(x) {
    feature_penalties <- rep(1.0, ncol(train$data))
13
14
    feature_penalties[var_index] <- x
    lightgbm(
15
16
      data = train$data
      , label = train$label
17
18
19
20
21
22
23
      , params = list(
        num_leaves = 5L
        , learning_rate = 0.05
        , objective = "binary"
        , feature_penalty = paste0(feature_penalties, collapse = ",")
        , metric = "binary_error"
      )
24
      , nrounds = 5L
25
      , verbose = -1L
26
27
    )
  })
28

29
30
31
  var_gain <- lapply(bst, function(x) lgb.importance(x)[Feature == var_name, Gain])
  var_cover <- lapply(bst, function(x) lgb.importance(x)[Feature == var_name, Cover])
  var_freq <- lapply(bst, function(x) lgb.importance(x)[Feature == var_name, Frequency])
32

33
  # Ensure that feature gain, cover, and frequency decreases with stronger penalties
34
35
36
  expect_true(all(diff(unlist(var_gain)) <= 0.0))
  expect_true(all(diff(unlist(var_cover)) <= 0.0))
  expect_true(all(diff(unlist(var_freq)) <= 0.0))
37

38
39
40
  expect_lt(min(diff(unlist(var_gain))), 0.0)
  expect_lt(min(diff(unlist(var_cover))), 0.0)
  expect_lt(min(diff(unlist(var_freq))), 0.0)
41

42
  # Ensure that feature is not used when feature_penalty = 0
43
  expect_length(var_gain[[length(var_gain)]], 0L)
44
})
45

46
test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where names are unique", {
47
  param_aliases <- .PARAMETER_ALIASES()
48
  expect_identical(class(param_aliases), "list")
49
  expect_true(length(param_aliases) > 100L)
50
51
52
53
  expect_true(is.character(names(param_aliases)))
  expect_true(is.character(param_aliases[["boosting"]]))
  expect_true(is.character(param_aliases[["early_stopping_round"]]))
  expect_true(is.character(param_aliases[["num_iterations"]]))
54
  expect_true(is.character(param_aliases[["pre_partition"]]))
55
56
57
  expect_true(length(names(param_aliases)) == length(param_aliases))
  expect_true(all(sapply(param_aliases, is.character)))
  expect_true(length(unique(names(param_aliases))) == length(param_aliases))
58
  expect_equal(sort(param_aliases[["task"]]), c("task", "task_type"))
59
  expect_equal(param_aliases[["bagging_fraction"]], c("bagging_fraction", "bagging", "sub_row", "subsample"))
60
61
})

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
test_that(".PARAMETER_ALIASES() uses the internal session cache", {

  cache_key <- "PARAMETER_ALIASES"

  # clear cache, so this test isn't reliant on the order unit tests are run in
  if (exists(cache_key, where = .lgb_session_cache_env)) {
    rm(list = cache_key, envir = .lgb_session_cache_env)
  }
  expect_false(exists(cache_key, where = .lgb_session_cache_env))

  # check that result looks correct for at least one parameter
  iter_aliases <- .PARAMETER_ALIASES()[["num_iterations"]]
  expect_true(is.character(iter_aliases))
  expect_true(all(c("num_round", "nrounds") %in% iter_aliases))

  # patch the cache to check that .PARAMETER_ALIASES() checks it
  assign(
    x = cache_key
    , value = list(num_iterations = c("test", "other_test"))
    , envir = .lgb_session_cache_env
  )
  iter_aliases <- .PARAMETER_ALIASES()[["num_iterations"]]
  expect_equal(iter_aliases, c("test", "other_test"))

  # re-set cache so this doesn't interfere with other unit tests
  if (exists(cache_key, where = .lgb_session_cache_env)) {
    rm(list = cache_key, envir = .lgb_session_cache_env)
  }
  expect_false(exists(cache_key, where = .lgb_session_cache_env))
})

93
test_that("training should warn if you use 'dart' boosting, specified with 'boosting' or aliases", {
94
  for (boosting_param in .PARAMETER_ALIASES()[["boosting"]]) {
95
96
97
98
99
100
101
    params <- list(
        num_leaves = 5L
        , learning_rate = 0.05
        , objective = "binary"
        , metric = "binary_error"
    )
    params[[boosting_param]] <- "dart"
102
103
104
105
    expect_warning({
      result <- lightgbm(
        data = train$data
        , label = train$label
106
        , params = params
107
108
        , nrounds = 5L
        , verbose = -1L
109
110
111
112
      )
    }, regexp = "Early stopping is not available in 'dart' mode")
  }
})