"examples/xendcg/README.md" did not exist on "49df9e60d742ca7be8a7aad65f9a6b2519883819"
multiclass.R 2.27 KB
Newer Older
1
2
require(lightgbm)

3
# We load the default iris dataset shipped with R
4
5
data(iris)

6
7
8
9
# We must convert factors to numeric
# They must be starting from number 0 to use multiclass
# For instance: 0, 1, 2, 3, 4, 5...
iris$Species <- as.numeric(as.factor(iris$Species)) - 1
10

11
12
# We cut the data set into 80% train and 20% validation
# The 10 last samples of each class are for validation
13
14
15

train <- as.matrix(iris[c(1:40, 51:90, 101:140), ])
test <- as.matrix(iris[c(41:50, 91:100, 141:150), ])
16
17
18
dtrain <- lgb.Dataset(data = train[, 1:4], label = train[, 5])
dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1:4], label = test[, 5])
valids <- list(test = dtest)
19

20
21
22
23
24
25
26
27
28
# Method 1 of training
params <- list(objective = "multiclass", metric = "multi_error", num_class = 3)
model <- lgb.train(params,
                   dtrain,
                   100,
                   valids,
                   min_data = 1,
                   learning_rate = 1,
                   early_stopping_rounds = 10)
29

30
31
# We can predict on test data, outputs a 90-length vector
# Order: obs1 class1, obs1 class2, obs1 class3, obs2 class1, obs2 class2, obs2 class3...
32
33
my_preds <- predict(model, test[, 1:4])

34
35
36
37
38
39
40
41
42
43
44
# Method 2 of training, identical
model <- lgb.train(list(),
                   dtrain,
                   100,
                   valids,
                   min_data = 1,
                   learning_rate = 1,
                   early_stopping_rounds = 10,
                   objective = "multiclass",
                   metric = "multi_error",
                   num_class = 3)
45

46
# We can predict on test data, identical
47
48
my_preds <- predict(model, test[, 1:4])

49
# A (30x3) matrix with the predictions, use parameter reshape
50
51
52
53
# class1 class2 class3
#   obs1   obs1   obs1
#   obs2   obs2   obs2
#   ....   ....   ....
54
my_preds <- predict(model, test[, 1:4], reshape = TRUE)
55

56
57
# We can also get the predicted scores before the Sigmoid/Softmax application
my_preds <- predict(model, test[, 1:4], rawscore = TRUE)
58

59
60
# Raw score predictions as matrix instead of vector
my_preds <- predict(model, test[, 1:4], rawscore = TRUE, reshape = TRUE)
61

62
63
# We can also get the leaf index
my_preds <- predict(model, test[, 1:4], predleaf = TRUE)
64

65
66
# Predict leaf index as matrix instead of vector
my_preds <- predict(model, test[, 1:4], predleaf = TRUE, reshape = TRUE)