multiclass.R 2.15 KB
Newer Older
1
library(lightgbm)
2

3
# We load the default iris dataset shipped with R
4
5
data(iris)

6
7
8
# We must convert factors to numeric
# They must be starting from number 0 to use multiclass
# For instance: 0, 1, 2, 3, 4, 5...
9
iris$Species <- as.numeric(as.factor(iris$Species)) - 1L
10

11
12
# We cut the data set into 80% train and 20% validation
# The 10 last samples of each class are for validation
13

14
15
16
17
train <- as.matrix(iris[c(1L:40L, 51L:90L, 101L:140L), ])
test <- as.matrix(iris[c(41L:50L, 91L:100L, 141L:150L), ])
dtrain <- lgb.Dataset(data = train[, 1L:4L], label = train[, 5L])
dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5L])
18
valids <- list(test = dtest)
19

20
# Method 1 of training
21
22
23
24
25
26
27
params <- list(
    objective = "multiclass"
    , metric = "multi_error"
    , num_class = 3L
    , min_data = 1L
    , learning_rate = 1.0
)
28
29
30
model <- lgb.train(
    params
    , dtrain
31
    , 100L
32
    , valids
33
    , early_stopping_rounds = 10L
34
)
35

36
37
# We can predict on test data, outputs a 90-length vector
# Order: obs1 class1, obs1 class2, obs1 class3, obs2 class1, obs2 class2, obs2 class3...
38
my_preds <- predict(model, test[, 1L:4L])
39

40
# Method 2 of training, identical
41
42
43
44
45
46
47
params <- list(
    min_data = 1L
    , learning_rate = 1.0
    , objective = "multiclass"
    , metric = "multi_error"
    , num_class = 3L
)
48
model <- lgb.train(
49
    params
50
    , dtrain
51
    , 100L
52
    , valids
53
    , early_stopping_rounds = 10L
54
)
55

56
# We can predict on test data, identical
57
my_preds <- predict(model, test[, 1L:4L])
58

59
# A (30x3) matrix with the predictions, use parameter reshape
60
61
62
63
# class1 class2 class3
#   obs1   obs1   obs1
#   obs2   obs2   obs2
#   ....   ....   ....
64
my_preds <- predict(model, test[, 1L:4L], reshape = TRUE)
65

66
# We can also get the predicted scores before the Sigmoid/Softmax application
67
my_preds <- predict(model, test[, 1L:4L], rawscore = TRUE)
68

69
# Raw score predictions as matrix instead of vector
70
my_preds <- predict(model, test[, 1L:4L], rawscore = TRUE, reshape = TRUE)
71

72
# We can also get the leaf index
73
my_preds <- predict(model, test[, 1L:4L], predleaf = TRUE)
74

75
# Predict leaf index as matrix instead of vector
76
my_preds <- predict(model, test[, 1L:4L], predleaf = TRUE, reshape = TRUE)