Commit b0017e5b authored by Laurae's avatar Laurae Committed by Guolin Ke
Browse files

[R-package] Add weight-parameter adjustment example (#416)

* Add weight adjustment tutorial

* Update 00Index

* update README to reflect changes

* Add linear separation
parent 45c1c6e8
......@@ -4,3 +4,4 @@ early_stopping Early Stop in training
cross_validation Cross Validation
multiclass Multiclass training/prediction
leaf_stability Leaf (in)Stability example
weight_param Weight-Parameter adjustment relationship
......@@ -6,3 +6,4 @@ LightGBM R examples
* [Cross Validation](cross_validation.R)
* [Multiclass Training/Prediction](multiclass.R)
* [Leaf (in)Stability](leaf_stability.R)
* [Weight-Parameter Adjustment Relationship](weight_param.R)
# This demo R code is to provide a demonstration of hyperparameter adjustment
# when scaling weights for appropriate learning
# As with any optimizers, bad parameters can impair performance
# Load library
library(lightgbm)
# We will train a model with the following scenarii:
# - Run 1: sum of weights equal to 0.06513 without adjusted regularization (not learning)
# - Run 2: sum of weights equal to 0.06513 with adjusted regularization (learning)
# - Run 3: sum of weights equal to 6513 (x 1e5) with adjusted regularization (learning)
# Setup small weights
weights1 <- rep(1/100000, 6513)
weights2 <- rep(1/100000, 1611)
# Load data and create datasets
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label, weight = weights1)
data(agaricus.test, package = "lightgbm")
test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label, weight = weights2)
valids <- list(test = dtest)
# Run 1: sum of weights equal to 0.06513 without adjusted regularization (not learning)
# It cannot learn because regularization is too large!
# min_sum_hessian alone is bigger than the sum of weights, thus you will never learn anything
params <- list(objective = "regression",
metric = "l2",
device = "cpu",
min_sum_hessian = 10,
num_leaves = 7,
max_depth = 3,
nthread = 1)
model <- lgb.train(params,
dtrain,
50,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
weight_loss <- as.numeric(model$record_evals$test$l2$eval)
plot(weight_loss) # Shows how poor the learning was: a straight line!
# Run 2: sum of weights equal to 0.06513 with adjusted regularization (learning)
# Adjusted regularization just consisting in multiplicating results by 1e4 (x10000)
# Notice how it learns, there is no issue as we adjusted regularization ourselves
params <- list(objective = "regression",
metric = "l2",
device = "cpu",
min_sum_hessian = 1e-4,
num_leaves = 7,
max_depth = 3,
nthread = 1)
model <- lgb.train(params,
dtrain,
50,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
small_weight_loss <- as.numeric(model$record_evals$test$l2$eval)
plot(small_weight_loss) # It learns!
# Run 3: sum of weights equal to 6513 (x 1e5) with adjusted regularization (learning)
# To make it better, we are first cleaning the environment and reloading LightGBM
lgb.unloader(wipe = TRUE)
# And now, we are doing as usual
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
data(agaricus.test, package = "lightgbm")
test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
valids <- list(test = dtest)
# Setup parameters and run model...
params <- list(objective = "regression",
metric = "l2",
device = "cpu",
min_sum_hessian = 10,
num_leaves = 7,
max_depth = 3,
nthread = 1)
model <- lgb.train(params,
dtrain,
50,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
large_weight_loss <- as.numeric(model$record_evals$test$l2$eval)
plot(large_weight_loss) # It learns!
# Do you want to compare the learning? They both converge.
plot(small_weight_loss, large_weight_loss)
curve(1*x, from = 0, to = 0.02, add = TRUE)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment