basic_walkthrough.R 4.9 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
require(lightgbm)
require(methods)

4
# We load in the agaricus dataset
Guolin Ke's avatar
Guolin Ke committed
5
# In this example, we are aiming to predict whether a mushroom is edible
6
7
data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = "lightgbm")
Guolin Ke's avatar
Guolin Ke committed
8
9
train <- agaricus.train
test <- agaricus.test
10
11

# The loaded data is stored in sparseMatrix, and label is a numeric vector in {0,1}
Guolin Ke's avatar
Guolin Ke committed
12
13
14
class(train$label)
class(train$data)

15
16
17
18
#--------------------Basic Training using lightgbm----------------
# This is the basic usage of lightgbm you can put matrix in data field
# Note: we are putting in sparse matrix here, lightgbm naturally handles sparse input
# Use sparse matrix when your feature is sparse (e.g. when you are using one-hot encoding vector)
Guolin Ke's avatar
Guolin Ke committed
19
print("Training lightgbm with sparseMatrix")
20
21
22
bst <- lightgbm(
    data = train$data
    , label = train$label
23
24
25
    , num_leaves = 4L
    , learning_rate = 1.0
    , nrounds = 2L
26
27
    , objective = "binary"
)
28
29

# Alternatively, you can put in dense matrix, i.e. basic R-matrix
Guolin Ke's avatar
Guolin Ke committed
30
print("Training lightgbm with Matrix")
31
32
33
bst <- lightgbm(
    data = as.matrix(train$data)
    , label = train$label
34
35
36
    , num_leaves = 4L
    , learning_rate = 1.0
    , nrounds = 2L
37
38
    , objective = "binary"
)
Guolin Ke's avatar
Guolin Ke committed
39

40
# You can also put in lgb.Dataset object, which stores label, data and other meta datas needed for advanced features
Guolin Ke's avatar
Guolin Ke committed
41
print("Training lightgbm with lgb.Dataset")
42
43
44
45
46
47
dtrain <- lgb.Dataset(
    data = train$data
    , label = train$label
)
bst <- lightgbm(
    data = dtrain
48
49
50
    , num_leaves = 4L
    , learning_rate = 1.0
    , nrounds = 2L
51
52
    , objective = "binary"
)
Guolin Ke's avatar
Guolin Ke committed
53
54
55

# Verbose = 0,1,2
print("Train lightgbm with verbose 0, no message")
56
57
bst <- lightgbm(
    data = dtrain
58
59
60
    , num_leaves = 4L
    , learning_rate = 1.0
    , nrounds = 2L
61
    , objective = "binary"
62
    , verbose = 0L
63
)
64

Guolin Ke's avatar
Guolin Ke committed
65
print("Train lightgbm with verbose 1, print evaluation metric")
66
67
bst <- lightgbm(
    data = dtrain
68
69
70
71
    , num_leaves = 4L
    , learning_rate = 1.0
    , nrounds = 2L
    , nthread = 2L
72
    , objective = "binary"
73
    , verbose = 1L
74
)
75

Guolin Ke's avatar
Guolin Ke committed
76
print("Train lightgbm with verbose 2, also print information about tree")
77
78
bst <- lightgbm(
    data = dtrain
79
80
81
82
    , num_leaves = 4L
    , learning_rate = 1.0
    , nrounds = 2L
    , nthread = 2L
83
    , objective = "binary"
84
    , verbose = 2L
85
)
Guolin Ke's avatar
Guolin Ke committed
86

87
88
# You can also specify data as file path to a LibSVM/TCV/CSV format input
# Since we do not have this file with us, the following line is just for illustration
89
90
# bst <- lightgbm(
#     data = "agaricus.train.svm"
91
92
93
#     , num_leaves = 4L
#     , learning_rate = 1.0
#     , nrounds = 2L
94
95
#     , objective = "binary"
# )
Guolin Ke's avatar
Guolin Ke committed
96

97
98
99
#--------------------Basic prediction using lightgbm--------------
# You can do prediction using the following line
# You can put in Matrix, sparseMatrix, or lgb.Dataset
Guolin Ke's avatar
Guolin Ke committed
100
101
102
103
pred <- predict(bst, test$data)
err <- mean(as.numeric(pred > 0.5) != test$label)
print(paste("test-error=", err))

104
105
#--------------------Save and load models-------------------------
# Save model to binary local file
Guolin Ke's avatar
Guolin Ke committed
106
lgb.save(bst, "lightgbm.model")
107
108

# Load binary model to R
Guolin Ke's avatar
Guolin Ke committed
109
110
bst2 <- lgb.load("lightgbm.model")
pred2 <- predict(bst2, test$data)
111

Guolin Ke's avatar
Guolin Ke committed
112
# pred2 should be identical to pred
113
print(paste("sum(abs(pred2-pred))=", sum(abs(pred2 - pred))))
Guolin Ke's avatar
Guolin Ke committed
114

115
116
117
#--------------------Advanced features ---------------------------
# To use advanced features, we need to put data in lgb.Dataset
dtrain <- lgb.Dataset(data = train$data, label = train$label, free_raw_data = FALSE)
Laurae's avatar
Laurae committed
118
dtest <- lgb.Dataset.create.valid(dtrain, data = test$data, label = test$label)
Guolin Ke's avatar
Guolin Ke committed
119

120
#--------------------Using validation set-------------------------
Guolin Ke's avatar
Guolin Ke committed
121
# valids is a list of lgb.Dataset, each of them is tagged with name
122
123
124
valids <- list(train = dtrain, test = dtest)

# To train with valids, use lgb.train, which contains more advanced features
125
# valids allows us to monitor the evaluation result on all data in the list
Guolin Ke's avatar
Guolin Ke committed
126
print("Train lightgbm using lgb.train with valids")
127
128
bst <- lgb.train(
    data = dtrain
129
130
131
    , num_leaves = 4L
    , learning_rate = 1.0
    , nrounds = 2L
132
    , valids = valids
133
    , nthread = 2L
134
135
    , objective = "binary"
)
136
137
138

# We can change evaluation metrics, or use multiple evaluation metrics
print("Train lightgbm using lgb.train with valids, watch logloss and error")
139
140
bst <- lgb.train(
    data = dtrain
141
142
143
    , num_leaves = 4L
    , learning_rate = 1.0
    , nrounds = 2L
144
145
    , valids = valids
    , eval = c("binary_error", "binary_logloss")
146
    , nthread = 2L
147
148
    , objective = "binary"
)
Guolin Ke's avatar
Guolin Ke committed
149
150
151

# lgb.Dataset can also be saved using lgb.Dataset.save
lgb.Dataset.save(dtrain, "dtrain.buffer")
152
153

# To load it in, simply call lgb.Dataset
Guolin Ke's avatar
Guolin Ke committed
154
dtrain2 <- lgb.Dataset("dtrain.buffer")
155
156
bst <- lgb.train(
    data = dtrain2
157
158
159
    , num_leaves = 4L
    , learning_rate = 1.0
    , nrounds = 2L
160
    , valids = valids
161
    , nthread = 2L
162
163
    , objective = "binary"
)
164

Guolin Ke's avatar
Guolin Ke committed
165
# information can be extracted from lgb.Dataset using getinfo
166
label <- getinfo(dtest, "label")
Guolin Ke's avatar
Guolin Ke committed
167
pred <- predict(bst, test$data)
168
err <- as.numeric(sum(as.integer(pred > 0.5) != label)) / length(label)
Guolin Ke's avatar
Guolin Ke committed
169
print(paste("test-error=", err))