Unverified Commit 846e8954 authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[R-package] fix warnings in demos (#4569)



* [R-package] fix warnings in demos

* Apply suggestions from code review
Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>

* fix additional params issues in multiclass and categorical_feature examples

* Update R-package/demo/multiclass.R
Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
parent b4213e96
...@@ -326,7 +326,7 @@ coverage.xml ...@@ -326,7 +326,7 @@ coverage.xml
.hypothesis/ .hypothesis/
**/coverage.html **/coverage.html
**/coverage.html.zip **/coverage.html.zip
R-package/tests/testthat/Rplots.pdf **/Rplots.pdf
# Translations # Translations
*.mo *.mo
...@@ -427,6 +427,7 @@ miktex*.zip ...@@ -427,6 +427,7 @@ miktex*.zip
*.def *.def
# Files created by examples and tests # Files created by examples and tests
*.buffer
**/lgb-Dataset.data **/lgb-Dataset.data
**/lgb.Dataset.data **/lgb.Dataset.data
**/model.txt **/model.txt
......
...@@ -12,6 +12,14 @@ test <- agaricus.test ...@@ -12,6 +12,14 @@ test <- agaricus.test
class(train$label) class(train$label)
class(train$data) class(train$data)
# Set parameters for model training
train_params <- list(
num_leaves = 4L
, learning_rate = 1.0
, objective = "binary"
, nthread = 2L
)
#--------------------Basic Training using lightgbm---------------- #--------------------Basic Training using lightgbm----------------
# This is the basic usage of lightgbm you can put matrix in data field # This is the basic usage of lightgbm you can put matrix in data field
# Note: we are putting in sparse matrix here, lightgbm naturally handles sparse input # Note: we are putting in sparse matrix here, lightgbm naturally handles sparse input
...@@ -19,22 +27,18 @@ class(train$data) ...@@ -19,22 +27,18 @@ class(train$data)
print("Training lightgbm with sparseMatrix") print("Training lightgbm with sparseMatrix")
bst <- lightgbm( bst <- lightgbm(
data = train$data data = train$data
, params = train_params
, label = train$label , label = train$label
, num_leaves = 4L
, learning_rate = 1.0
, nrounds = 2L , nrounds = 2L
, objective = "binary"
) )
# Alternatively, you can put in dense matrix, i.e. basic R-matrix # Alternatively, you can put in dense matrix, i.e. basic R-matrix
print("Training lightgbm with Matrix") print("Training lightgbm with Matrix")
bst <- lightgbm( bst <- lightgbm(
data = as.matrix(train$data) data = as.matrix(train$data)
, params = train_params
, label = train$label , label = train$label
, num_leaves = 4L
, learning_rate = 1.0
, nrounds = 2L , nrounds = 2L
, objective = "binary"
) )
# You can also put in lgb.Dataset object, which stores label, data and other meta datas needed for advanced features # You can also put in lgb.Dataset object, which stores label, data and other meta datas needed for advanced features
...@@ -45,42 +49,32 @@ dtrain <- lgb.Dataset( ...@@ -45,42 +49,32 @@ dtrain <- lgb.Dataset(
) )
bst <- lightgbm( bst <- lightgbm(
data = dtrain data = dtrain
, num_leaves = 4L , params = train_params
, learning_rate = 1.0
, nrounds = 2L , nrounds = 2L
, objective = "binary"
) )
# Verbose = 0,1,2 # Verbose = 0,1,2
print("Train lightgbm with verbose 0, no message") print("Train lightgbm with verbose 0, no message")
bst <- lightgbm( bst <- lightgbm(
data = dtrain data = dtrain
, num_leaves = 4L , params = train_params
, learning_rate = 1.0
, nrounds = 2L , nrounds = 2L
, objective = "binary"
, verbose = 0L , verbose = 0L
) )
print("Train lightgbm with verbose 1, print evaluation metric") print("Train lightgbm with verbose 1, print evaluation metric")
bst <- lightgbm( bst <- lightgbm(
data = dtrain data = dtrain
, num_leaves = 4L , params = train_params
, learning_rate = 1.0
, nrounds = 2L , nrounds = 2L
, nthread = 2L
, objective = "binary"
, verbose = 1L , verbose = 1L
) )
print("Train lightgbm with verbose 2, also print information about tree") print("Train lightgbm with verbose 2, also print information about tree")
bst <- lightgbm( bst <- lightgbm(
data = dtrain data = dtrain
, num_leaves = 4L , params = train_params
, learning_rate = 1.0
, nrounds = 2L , nrounds = 2L
, nthread = 2L
, objective = "binary"
, verbose = 2L , verbose = 2L
) )
...@@ -126,25 +120,19 @@ valids <- list(train = dtrain, test = dtest) ...@@ -126,25 +120,19 @@ valids <- list(train = dtrain, test = dtest)
print("Train lightgbm using lgb.train with valids") print("Train lightgbm using lgb.train with valids")
bst <- lgb.train( bst <- lgb.train(
data = dtrain data = dtrain
, num_leaves = 4L , params = train_params
, learning_rate = 1.0
, nrounds = 2L , nrounds = 2L
, valids = valids , valids = valids
, nthread = 2L
, objective = "binary"
) )
# We can change evaluation metrics, or use multiple evaluation metrics # We can change evaluation metrics, or use multiple evaluation metrics
print("Train lightgbm using lgb.train with valids, watch logloss and error") print("Train lightgbm using lgb.train with valids, watch logloss and error")
bst <- lgb.train( bst <- lgb.train(
data = dtrain data = dtrain
, num_leaves = 4L , params = train_params
, learning_rate = 1.0
, nrounds = 2L , nrounds = 2L
, valids = valids , valids = valids
, eval = c("binary_error", "binary_logloss") , eval = c("binary_error", "binary_logloss")
, nthread = 2L
, objective = "binary"
) )
# lgb.Dataset can also be saved using lgb.Dataset.save # lgb.Dataset can also be saved using lgb.Dataset.save
...@@ -154,12 +142,9 @@ lgb.Dataset.save(dtrain, "dtrain.buffer") ...@@ -154,12 +142,9 @@ lgb.Dataset.save(dtrain, "dtrain.buffer")
dtrain2 <- lgb.Dataset("dtrain.buffer") dtrain2 <- lgb.Dataset("dtrain.buffer")
bst <- lgb.train( bst <- lgb.train(
data = dtrain2 data = dtrain2
, num_leaves = 4L , params = train_params
, learning_rate = 1.0
, nrounds = 2L , nrounds = 2L
, valids = valids , valids = valids
, nthread = 2L
, objective = "binary"
) )
# information can be extracted from lgb.Dataset using getinfo # information can be extracted from lgb.Dataset using getinfo
......
...@@ -85,7 +85,6 @@ params <- list( ...@@ -85,7 +85,6 @@ params <- list(
, metric = "l2" , metric = "l2"
, min_data = 1L , min_data = 1L
, learning_rate = 0.1 , learning_rate = 0.1
, min_data = 0L
, min_hessian = 1.0 , min_hessian = 1.0
, max_depth = 2L , max_depth = 2L
) )
......
...@@ -46,7 +46,7 @@ bst <- lgb.train( ...@@ -46,7 +46,7 @@ bst <- lgb.train(
, dtrain , dtrain
, num_round , num_round
, valids , valids
, objective = logregobj , obj = logregobj
, eval = evalerror , eval = evalerror
, early_stopping_round = 3L , early_stopping_round = 3L
) )
...@@ -85,18 +85,21 @@ test <- agaricus.test ...@@ -85,18 +85,21 @@ test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
# setup parameters and we train a model # setup parameters and we train a model
params <- list(objective = "regression", metric = "l2") params <- list(
objective = "regression"
, metric = "l2"
, min_data = 1L
, learning_rate = 0.1
, bagging_fraction = 0.1
, bagging_freq = 1L
, bagging_seed = 1L
)
valids <- list(test = dtest) valids <- list(test = dtest)
model <- lgb.train( model <- lgb.train(
params params
, dtrain , dtrain
, 50L , 50L
, valids , valids
, min_data = 1L
, learning_rate = 0.1
, bagging_fraction = 0.1
, bagging_freq = 1L
, bagging_seed = 1L
) )
# We create a data.frame with the following structure: # We create a data.frame with the following structure:
...@@ -141,13 +144,17 @@ table(new_data$binned) ...@@ -141,13 +144,17 @@ table(new_data$binned)
.depth_density_plot(df = new_data) .depth_density_plot(df = new_data)
# Now, let's show with other parameters # Now, let's show with other parameters
params <- list(
objective = "regression"
, metric = "l2"
, min_data = 1L
, learning_rate = 1.0
)
model2 <- lgb.train( model2 <- lgb.train(
params params
, dtrain , dtrain
, 100L , 100L
, valids , valids
, min_data = 1L
, learning_rate = 1.0
) )
# We create the data structure, but for model2 # We create the data structure, but for model2
...@@ -193,13 +200,17 @@ table(new_data2$binned) ...@@ -193,13 +200,17 @@ table(new_data2$binned)
.depth_density_plot(df = new_data2) .depth_density_plot(df = new_data2)
# Now, try with very severe overfitting # Now, try with very severe overfitting
params <- list(
objective = "regression"
, metric = "l2"
, min_data = 1L
, learning_rate = 1.0
)
model3 <- lgb.train( model3 <- lgb.train(
params params
, dtrain , dtrain
, 1000L , 1000L
, valids , valids
, min_data = 1L
, learning_rate = 1.0
) )
# We create the data structure, but for model3 # We create the data structure, but for model3
......
...@@ -18,14 +18,18 @@ dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5 ...@@ -18,14 +18,18 @@ dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5
valids <- list(test = dtest) valids <- list(test = dtest)
# Method 1 of training # Method 1 of training
params <- list(objective = "multiclass", metric = "multi_error", num_class = 3L) params <- list(
objective = "multiclass"
, metric = "multi_error"
, num_class = 3L
, min_data = 1L
, learning_rate = 1.0
)
model <- lgb.train( model <- lgb.train(
params params
, dtrain , dtrain
, 100L , 100L
, valids , valids
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 10L , early_stopping_rounds = 10L
) )
...@@ -34,17 +38,19 @@ model <- lgb.train( ...@@ -34,17 +38,19 @@ model <- lgb.train(
my_preds <- predict(model, test[, 1L:4L]) my_preds <- predict(model, test[, 1L:4L])
# Method 2 of training, identical # Method 2 of training, identical
params <- list(
min_data = 1L
, learning_rate = 1.0
, objective = "multiclass"
, metric = "multi_error"
, num_class = 3L
)
model <- lgb.train( model <- lgb.train(
list() params
, dtrain , dtrain
, 100L , 100L
, valids , valids
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 10L , early_stopping_rounds = 10L
, objective = "multiclass"
, metric = "multi_error"
, num_class = 3L
) )
# We can predict on test data, identical # We can predict on test data, identical
......
...@@ -20,18 +20,20 @@ valids <- list(train = dtrain, test = dtest) ...@@ -20,18 +20,20 @@ valids <- list(train = dtrain, test = dtest)
# Method 1 of training with built-in multiclass objective # Method 1 of training with built-in multiclass objective
# Note: need to turn off boost from average to match custom objective # Note: need to turn off boost from average to match custom objective
# (https://github.com/microsoft/LightGBM/issues/1846) # (https://github.com/microsoft/LightGBM/issues/1846)
params <- list(
min_data = 1L
, learning_rate = 1.0
, num_class = 3L
, boost_from_average = FALSE
, metric = "multi_logloss"
)
model_builtin <- lgb.train( model_builtin <- lgb.train(
list() params
, dtrain , dtrain
, boost_from_average = FALSE
, 100L , 100L
, valids , valids
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 10L , early_stopping_rounds = 10L
, objective = "multiclass" , obj = "multiclass"
, metric = "multi_logloss"
, num_class = 3L
) )
preds_builtin <- predict(model_builtin, test[, 1L:4L], rawscore = TRUE, reshape = TRUE) preds_builtin <- predict(model_builtin, test[, 1L:4L], rawscore = TRUE, reshape = TRUE)
...@@ -92,17 +94,19 @@ custom_multiclass_metric <- function(preds, dtrain) { ...@@ -92,17 +94,19 @@ custom_multiclass_metric <- function(preds, dtrain) {
)) ))
} }
params <- list(
min_data = 1L
, learning_rate = 1.0
, num_class = 3L
)
model_custom <- lgb.train( model_custom <- lgb.train(
list() params
, dtrain , dtrain
, 100L , 100L
, valids , valids
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 10L , early_stopping_rounds = 10L
, objective = custom_multiclass_obj , obj = custom_multiclass_obj
, eval = custom_multiclass_metric , eval = custom_multiclass_metric
, num_class = 3L
) )
preds_custom <- predict(model_custom, test[, 1L:4L], rawscore = TRUE, reshape = TRUE) preds_custom <- predict(model_custom, test[, 1L:4L], rawscore = TRUE, reshape = TRUE)
......
...@@ -34,14 +34,14 @@ params <- list( ...@@ -34,14 +34,14 @@ params <- list(
, num_leaves = 7L , num_leaves = 7L
, max_depth = 3L , max_depth = 3L
, nthread = 1L , nthread = 1L
, min_data = 1L
, learning_rate = 1.0
) )
model <- lgb.train( model <- lgb.train(
params params
, dtrain , dtrain
, 50L , 50L
, valids , valids
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 10L , early_stopping_rounds = 10L
) )
weight_loss <- as.numeric(model$record_evals$test$l2$eval) weight_loss <- as.numeric(model$record_evals$test$l2$eval)
...@@ -58,14 +58,14 @@ params <- list( ...@@ -58,14 +58,14 @@ params <- list(
, num_leaves = 7L , num_leaves = 7L
, max_depth = 3L , max_depth = 3L
, nthread = 1L , nthread = 1L
, min_data = 1L
, learning_rate = 1.0
) )
model <- lgb.train( model <- lgb.train(
params params
, dtrain , dtrain
, 50L , 50L
, valids , valids
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 10L , early_stopping_rounds = 10L
) )
small_weight_loss <- as.numeric(model$record_evals$test$l2$eval) small_weight_loss <- as.numeric(model$record_evals$test$l2$eval)
...@@ -94,14 +94,14 @@ params <- list( ...@@ -94,14 +94,14 @@ params <- list(
, num_leaves = 7L , num_leaves = 7L
, max_depth = 3L , max_depth = 3L
, nthread = 1L , nthread = 1L
, min_data = 1L
, learning_rate = 1.0
) )
model <- lgb.train( model <- lgb.train(
params params
, dtrain , dtrain
, 50L , 50L
, valids , valids
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 10L , early_stopping_rounds = 10L
) )
large_weight_loss <- as.numeric(model$record_evals$test$l2$eval) large_weight_loss <- as.numeric(model$record_evals$test$l2$eval)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment