cross_validation.R 2.14 KB
Newer Older
1
2
library(lightgbm)

Guolin Ke's avatar
Guolin Ke committed
3
# load in the agaricus dataset
4
5
data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = "lightgbm")
Guolin Ke's avatar
Guolin Ke committed
6
dtrain <- lgb.Dataset(agaricus.train$data, label = agaricus.train$label)
7
dtest <- lgb.Dataset.create.valid(dtrain, data = agaricus.test$data, label = agaricus.test$label)
Guolin Ke's avatar
Guolin Ke committed
8

9
nrounds <- 2L
10
param <- list(
11
12
  num_leaves = 4L
  , learning_rate = 1.0
13
14
  , objective = "binary"
)
Guolin Ke's avatar
Guolin Ke committed
15

16
17
print("Running cross validation")
# Do cross validation, this will print result out as
Guolin Ke's avatar
Guolin Ke committed
18
19
# [iteration]  metric_name:mean_value+std_value
# std_value is standard deviation of the metric
20
21
22
23
lgb.cv(
  param
  , dtrain
  , nrounds
24
  , nfold = 5L
25
26
  , eval = "binary_error"
)
Guolin Ke's avatar
Guolin Ke committed
27

28
print("Running cross validation, disable standard deviation display")
Guolin Ke's avatar
Guolin Ke committed
29
30
31
# do cross validation, this will print result out as
# [iteration]  metric_name:mean_value+std_value
# std_value is standard deviation of the metric
32
33
34
35
lgb.cv(
  param
  , dtrain
  , nrounds
36
  , nfold = 5L
37
38
39
  , eval = "binary_error"
  , showsd = FALSE
)
Guolin Ke's avatar
Guolin Ke committed
40

joshkyh's avatar
joshkyh committed
41
# You can also do cross validation with cutomized loss function
42
print("Running cross validation, with cutomsized loss function")
Guolin Ke's avatar
Guolin Ke committed
43
44
45

logregobj <- function(preds, dtrain) {
  labels <- getinfo(dtrain, "label")
46
  preds <- 1.0 / (1.0 + exp(-preds))
Guolin Ke's avatar
Guolin Ke committed
47
  grad <- preds - labels
48
  hess <- preds * (1.0 - preds)
Guolin Ke's avatar
Guolin Ke committed
49
50
  return(list(grad = grad, hess = hess))
}
51
52
53
54
55
56

# User-defined evaluation function returns a pair (metric_name, result, higher_better)
# NOTE: when you do customized loss function, the default prediction value is margin
# This may make built-in evalution metric calculate wrong results
# For example, we are doing logistic loss, the prediction is score before logistic transformation
# Keep this in mind when you use the customization, and maybe you need write customized evaluation function
Guolin Ke's avatar
Guolin Ke committed
57
58
evalerror <- function(preds, dtrain) {
  labels <- getinfo(dtrain, "label")
59
60
  preds <- 1.0 / (1.0 + exp(-preds))
  err <- as.numeric(sum(labels != (preds > 0.5))) / length(labels)
61
  return(list(name = "error", value = err, higher_better = FALSE))
Guolin Ke's avatar
Guolin Ke committed
62
63
64
}

# train with customized objective
65
66
67
68
69
70
lgb.cv(
  params = param
  , data = dtrain
  , nrounds = nrounds
  , obj = logregobj
  , eval = evalerror
71
  , nfold = 5L
72
)