test_parameters.R 5.63 KB
Newer Older
1
2
data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = "lightgbm")
3
4
5
6
7
8
train <- agaricus.train
test <- agaricus.test

test_that("Feature penalties work properly", {
  # Fit a series of models with varying penalty on most important variable
  var_name <- "odor=none"
9
  var_index <- which(train$data@Dimnames[[2L]] == var_name)
10

11
12
  bst <- lapply(seq(1.0, 0.0, by = -0.1), function(x) {
    feature_penalties <- rep(1.0, ncol(train$data))
13
14
    feature_penalties[var_index] <- x
    lightgbm(
15
16
      data = train$data
      , label = train$label
17
18
19
20
21
22
      , params = list(
        num_leaves = 5L
        , learning_rate = 0.05
        , objective = "binary"
        , feature_penalty = paste0(feature_penalties, collapse = ",")
        , metric = "binary_error"
23
        , num_threads = .LGB_MAX_THREADS
24
      )
25
      , nrounds = 5L
26
      , verbose = -1L
27
28
    )
  })
29

30
31
32
  var_gain <- lapply(bst, function(x) lgb.importance(x)[Feature == var_name, Gain])
  var_cover <- lapply(bst, function(x) lgb.importance(x)[Feature == var_name, Cover])
  var_freq <- lapply(bst, function(x) lgb.importance(x)[Feature == var_name, Frequency])
33

34
  # Ensure that feature gain, cover, and frequency decreases with stronger penalties
35
36
37
  expect_true(all(diff(unlist(var_gain)) <= 0.0))
  expect_true(all(diff(unlist(var_cover)) <= 0.0))
  expect_true(all(diff(unlist(var_freq)) <= 0.0))
38

39
40
41
  expect_lt(min(diff(unlist(var_gain))), 0.0)
  expect_lt(min(diff(unlist(var_cover))), 0.0)
  expect_lt(min(diff(unlist(var_freq))), 0.0)
42

43
  # Ensure that feature is not used when feature_penalty = 0
44
  expect_length(var_gain[[length(var_gain)]], 0L)
45
})
46

47
test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where names are unique", {
48
  param_aliases <- .PARAMETER_ALIASES()
49
  expect_identical(class(param_aliases), "list")
50
  expect_true(length(param_aliases) > 100L)
51
52
53
54
  expect_true(is.character(names(param_aliases)))
  expect_true(is.character(param_aliases[["boosting"]]))
  expect_true(is.character(param_aliases[["early_stopping_round"]]))
  expect_true(is.character(param_aliases[["num_iterations"]]))
55
  expect_true(is.character(param_aliases[["pre_partition"]]))
56
57
58
  expect_true(length(names(param_aliases)) == length(param_aliases))
  expect_true(all(sapply(param_aliases, is.character)))
  expect_true(length(unique(names(param_aliases))) == length(param_aliases))
59
  expect_equal(sort(param_aliases[["task"]]), c("task", "task_type"))
60
  expect_equal(param_aliases[["bagging_fraction"]], c("bagging_fraction", "bagging", "sub_row", "subsample"))
61
62
})

63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
test_that(".PARAMETER_ALIASES() uses the internal session cache", {

  cache_key <- "PARAMETER_ALIASES"

  # clear cache, so this test isn't reliant on the order unit tests are run in
  if (exists(cache_key, where = .lgb_session_cache_env)) {
    rm(list = cache_key, envir = .lgb_session_cache_env)
  }
  expect_false(exists(cache_key, where = .lgb_session_cache_env))

  # check that result looks correct for at least one parameter
  iter_aliases <- .PARAMETER_ALIASES()[["num_iterations"]]
  expect_true(is.character(iter_aliases))
  expect_true(all(c("num_round", "nrounds") %in% iter_aliases))

  # patch the cache to check that .PARAMETER_ALIASES() checks it
  assign(
    x = cache_key
    , value = list(num_iterations = c("test", "other_test"))
    , envir = .lgb_session_cache_env
  )
  iter_aliases <- .PARAMETER_ALIASES()[["num_iterations"]]
  expect_equal(iter_aliases, c("test", "other_test"))

  # re-set cache so this doesn't interfere with other unit tests
  if (exists(cache_key, where = .lgb_session_cache_env)) {
    rm(list = cache_key, envir = .lgb_session_cache_env)
  }
  expect_false(exists(cache_key, where = .lgb_session_cache_env))
})

94
test_that("training should warn if you use 'dart' boosting with early stopping", {
95
  for (boosting_param in .PARAMETER_ALIASES()[["boosting"]]) {
96
97
98
99
100
    params <- list(
        num_leaves = 5L
        , learning_rate = 0.05
        , objective = "binary"
        , metric = "binary_error"
101
        , num_threads = .LGB_MAX_THREADS
102
103
    )
    params[[boosting_param]] <- "dart"
104
105

    # warning: early stopping requested
106
107
108
109
    expect_warning({
      result <- lightgbm(
        data = train$data
        , label = train$label
110
        , params = params
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
        , nrounds = 2L
        , verbose = .LGB_VERBOSITY
        , early_stopping_rounds = 1L
      )
    }, regexp = "Early stopping is not available in 'dart' mode")

    # no warning: early stopping not requested
    expect_silent({
      result <- lightgbm(
        data = train$data
        , label = train$label
        , params = params
        , nrounds = 2L
        , verbose = .LGB_VERBOSITY
        , early_stopping_rounds = NULL
      )
    })
  }
})

test_that("lgb.cv() should warn if you use 'dart' boosting with early stopping", {
  for (boosting_param in .PARAMETER_ALIASES()[["boosting"]]) {
    params <- list(
      num_leaves = 5L
      , objective = "binary"
      , metric = "binary_error"
      , num_threads = .LGB_MAX_THREADS
    )
    params[[boosting_param]] <- "dart"

    # warning: early stopping requested
    expect_warning({
      result <- lgb.cv(
        data = lgb.Dataset(
          data  = train$data
          , label = train$label
        )
        , params = params
        , nrounds = 2L
        , verbose = .LGB_VERBOSITY
        , early_stopping_rounds = 1L
152
153
      )
    }, regexp = "Early stopping is not available in 'dart' mode")
154
155
156
157
158
159
160
161
162
163
164
165
166
167

    # no warning: early stopping not requested
    expect_silent({
      result <- lgb.cv(
        data = lgb.Dataset(
          data  = train$data
          , label = train$label
        )
        , params = params
        , nrounds = 2L
        , verbose = .LGB_VERBOSITY
        , early_stopping_rounds = NULL
      )
    })
168
169
  }
})