basic_walkthrough.R 5.33 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
require(lightgbm)
require(methods)

4
# We load in the agaricus dataset
Guolin Ke's avatar
Guolin Ke committed
5
# In this example, we are aiming to predict whether a mushroom is edible
6
7
data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = "lightgbm")
Guolin Ke's avatar
Guolin Ke committed
8
9
train <- agaricus.train
test <- agaricus.test
10
11

# The loaded data is stored in sparseMatrix, and label is a numeric vector in {0,1}
Guolin Ke's avatar
Guolin Ke committed
12
13
14
class(train$label)
class(train$data)

15
16
17
18
#--------------------Basic Training using lightgbm----------------
# This is the basic usage of lightgbm you can put matrix in data field
# Note: we are putting in sparse matrix here, lightgbm naturally handles sparse input
# Use sparse matrix when your feature is sparse (e.g. when you are using one-hot encoding vector)
Guolin Ke's avatar
Guolin Ke committed
19
print("Training lightgbm with sparseMatrix")
20
21
22
23
24
bst <- lightgbm(data = train$data,
                label = train$label,
                num_leaves = 4,
                learning_rate = 1,
                nrounds = 2,
Guolin Ke's avatar
Guolin Ke committed
25
                objective = "binary")
26
27

# Alternatively, you can put in dense matrix, i.e. basic R-matrix
Guolin Ke's avatar
Guolin Ke committed
28
print("Training lightgbm with Matrix")
29
30
31
32
33
bst <- lightgbm(data = as.matrix(train$data),
                label = train$label,
                num_leaves = 4,
                learning_rate = 1,
                nrounds = 2,
Guolin Ke's avatar
Guolin Ke committed
34
35
                objective = "binary")

36
# You can also put in lgb.Dataset object, which stores label, data and other meta datas needed for advanced features
Guolin Ke's avatar
Guolin Ke committed
37
print("Training lightgbm with lgb.Dataset")
38
39
40
41
42
43
44
dtrain <- lgb.Dataset(data = train$data,
                      label = train$label)
bst <- lightgbm(data = dtrain,
                num_leaves = 4,
                learning_rate = 1,
                nrounds = 2,
                objective = "binary")
Guolin Ke's avatar
Guolin Ke committed
45
46
47

# Verbose = 0,1,2
print("Train lightgbm with verbose 0, no message")
48
49
50
51
52
53
54
bst <- lightgbm(data = dtrain,
                num_leaves = 4,
                learning_rate = 1,
                nrounds = 2,
                objective = "binary",
                verbose = 0)

Guolin Ke's avatar
Guolin Ke committed
55
print("Train lightgbm with verbose 1, print evaluation metric")
56
57
58
59
60
61
62
63
bst <- lightgbm(data = dtrain,
                num_leaves = 4,
                learning_rate = 1,
                nrounds = 2,
                nthread = 2,
                objective = "binary",
                verbose = 1)

Guolin Ke's avatar
Guolin Ke committed
64
print("Train lightgbm with verbose 2, also print information about tree")
65
66
67
68
69
70
71
bst <- lightgbm(data = dtrain,
                num_leaves = 4,
                learning_rate = 1,
                nrounds = 2,
                nthread = 2,
                objective = "binary",
                verbose = 2)
Guolin Ke's avatar
Guolin Ke committed
72

73
74
75
# You can also specify data as file path to a LibSVM/TCV/CSV format input
# Since we do not have this file with us, the following line is just for illustration
# bst <- lightgbm(data = "agaricus.train.svm", num_leaves = 4, learning_rate = 1, nrounds = 2,objective = "binary")
Guolin Ke's avatar
Guolin Ke committed
76

77
78
79
#--------------------Basic prediction using lightgbm--------------
# You can do prediction using the following line
# You can put in Matrix, sparseMatrix, or lgb.Dataset
Guolin Ke's avatar
Guolin Ke committed
80
81
82
83
pred <- predict(bst, test$data)
err <- mean(as.numeric(pred > 0.5) != test$label)
print(paste("test-error=", err))

84
85
#--------------------Save and load models-------------------------
# Save model to binary local file
Guolin Ke's avatar
Guolin Ke committed
86
lgb.save(bst, "lightgbm.model")
87
88

# Load binary model to R
Guolin Ke's avatar
Guolin Ke committed
89
90
bst2 <- lgb.load("lightgbm.model")
pred2 <- predict(bst2, test$data)
91

Guolin Ke's avatar
Guolin Ke committed
92
# pred2 should be identical to pred
93
print(paste("sum(abs(pred2-pred))=", sum(abs(pred2 - pred))))
Guolin Ke's avatar
Guolin Ke committed
94

95
96
97
98
#--------------------Advanced features ---------------------------
# To use advanced features, we need to put data in lgb.Dataset
dtrain <- lgb.Dataset(data = train$data, label = train$label, free_raw_data = FALSE)
dtest <- lgb.Dataset(data = test$data, label = test$label, free_raw_data = FALSE)
Guolin Ke's avatar
Guolin Ke committed
99

100
#--------------------Using validation set-------------------------
Guolin Ke's avatar
Guolin Ke committed
101
# valids is a list of lgb.Dataset, each of them is tagged with name
102
103
104
valids <- list(train = dtrain, test = dtest)

# To train with valids, use lgb.train, which contains more advanced features
105
# valids allows us to monitor the evaluation result on all data in the list
Guolin Ke's avatar
Guolin Ke committed
106
print("Train lightgbm using lgb.train with valids")
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
bst <- lgb.train(data = dtrain,
                 num_leaves = 4,
                 learning_rate = 1,
                 nrounds = 2,
                 valids = valids,
                 nthread = 2,
                 objective = "binary")

# We can change evaluation metrics, or use multiple evaluation metrics
print("Train lightgbm using lgb.train with valids, watch logloss and error")
bst <- lgb.train(data = dtrain,
                 num_leaves = 4,
                 learning_rate = 1,
                 nrounds = 2,
                 valids = valids,
                 eval = c("binary_error", "binary_logloss"),
                 nthread = 2,
                 objective = "binary")
Guolin Ke's avatar
Guolin Ke committed
125
126
127

# lgb.Dataset can also be saved using lgb.Dataset.save
lgb.Dataset.save(dtrain, "dtrain.buffer")
128
129

# To load it in, simply call lgb.Dataset
Guolin Ke's avatar
Guolin Ke committed
130
dtrain2 <- lgb.Dataset("dtrain.buffer")
131
132
133
134
135
136
137
138
bst <- lgb.train(data = dtrain2,
                 num_leaves = 4,
                 learning_rate = 1,
                 nrounds = 2,
                 valids = valids,
                 nthread = 2,
                 objective = "binary")

Guolin Ke's avatar
Guolin Ke committed
139
140
141
# information can be extracted from lgb.Dataset using getinfo
label = getinfo(dtest, "label")
pred <- predict(bst, test$data)
142
err <- as.numeric(sum(as.integer(pred > 0.5) != label)) / length(label)
Guolin Ke's avatar
Guolin Ke committed
143
print(paste("test-error=", err))