multiclass.R 1.94 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
require(lightgbm)

# we load the default iris dataset shipped with R
data(iris)

# we must convert factors to numeric
# they must be starting from number 0 to use multiclass
# for instance: 0, 1, 2, 3, 4, 5...
iris$Species <- as.numeric(as.factor(iris$Species))-1

# we cut the data set into 80% train and 20% validation
# the 10 last samples of each class are for validation

train <- as.matrix(iris[c(1:40, 51:90, 101:140), ])
test <- as.matrix(iris[c(41:50, 91:100, 141:150), ])
dtrain <- lgb.Dataset(data=train[, 1:4], label=train[, 5])
dtest <- lgb.Dataset.create.valid(dtrain, data=test[, 1:4], label=test[, 5])
valids <- list(test=dtest)

# method 1 of training
params <- list(objective="multiclass", metric="multi_error", num_class=3)
model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10)

# we can predict on test data, outputs a 90-length vector
# order: obs1 class1, obs1 class2, obs1 class3, obs2 class1, obs2 class2, obs2 class3...
my_preds <- predict(model, test[, 1:4])

# method 2 of training, identical
model <- lgb.train(list(), dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10, objective="multiclass", metric="multi_error", num_class=3)

# we can predict on test data, identical
my_preds <- predict(model, test[, 1:4])

# a (30x3) matrix with the predictions, use parameter reshape
# class1 class2 class3
#   obs1   obs1   obs1
#   obs2   obs2   obs2
#   ....   ....   ....
my_preds <- predict(model, test[, 1:4], reshape=TRUE)

# we can also get the predicted scores before the Sigmoid/Softmax application
my_preds <- predict(model, test[, 1:4], rawscore=TRUE)

# raw score predictions as matrix instead of vector
my_preds <- predict(model, test[, 1:4], rawscore=TRUE, reshape=TRUE)

# we can also get the leaf index
my_preds <- predict(model, test[, 1:4], predleaf=TRUE)

# preddict leaf index as matrix instead of vector
my_preds <- predict(model, test[, 1:4], predleaf=TRUE, reshape=TRUE)