Commit 535cdc65 authored by Guolin Ke's avatar Guolin Ke
Browse files

[R-package] update examples

parent 551d59ca
...@@ -33,5 +33,6 @@ Depends: ...@@ -33,5 +33,6 @@ Depends:
R (>= 3.0), R (>= 3.0),
R6 R6
Imports: Imports:
Matrix (>= 1.1-0) Matrix (>= 1.1-0),
methods
RoxygenNote: 5.0.1 RoxygenNote: 5.0.1
\ No newline at end of file
...@@ -22,5 +22,6 @@ export(lgb.train) ...@@ -22,5 +22,6 @@ export(lgb.train)
export(lightgbm) export(lightgbm)
export(setinfo) export(setinfo)
export(slice) export(slice)
import(methods)
importFrom(R6,R6Class) importFrom(R6,R6Class)
useDynLib(lightgbm) useDynLib(lightgbm)
...@@ -95,7 +95,7 @@ lgb.train <- function(params=list(), data, nrounds=10, ...@@ -95,7 +95,7 @@ lgb.train <- function(params=list(), data, nrounds=10,
data$set_colnames(colnames) data$set_colnames(colnames)
} }
data$set_categorical_feature(categorical_feature) data$set_categorical_feature(categorical_feature)
data$construct()
vaild_contain_train <- FALSE vaild_contain_train <- FALSE
train_data_name <- "train" train_data_name <- "train"
reduced_valid_sets <- list() reduced_valid_sets <- list()
......
...@@ -9,8 +9,10 @@ lightgbm <- function(data, label = NULL, weight = NULL, ...@@ -9,8 +9,10 @@ lightgbm <- function(data, label = NULL, weight = NULL,
early_stopping_rounds = NULL, early_stopping_rounds = NULL,
save_name = "lightgbm.model", save_name = "lightgbm.model",
init_model = NULL, callbacks = list(), ...) { init_model = NULL, callbacks = list(), ...) {
dtrain <- data
dtrain <- lgb.Dataset(data, label=label, weight=weight) if(!lgb.is.Dataset(dtrain)) {
dtrain <- lgb.Dataset(data, label=label, weight=weight)
}
valids <- list() valids <- list()
if (verbose > 0) if (verbose > 0)
...@@ -78,6 +80,7 @@ NULL ...@@ -78,6 +80,7 @@ NULL
NULL NULL
# Various imports # Various imports
#' @import methods
#' @importFrom R6 R6Class #' @importFrom R6 R6Class
#' @useDynLib lightgbm #' @useDynLib lightgbm
NULL NULL
\ No newline at end of file
...@@ -3,7 +3,15 @@ LightGBM R Package ...@@ -3,7 +3,15 @@ LightGBM R Package
Installation Installation
------------ ------------
``` ```
cd R-package cd R-package
R CMD INSTALL --build . R CMD INSTALL --build .
``` ```
\ No newline at end of file
For windows user, you may need to run command prompt as administrator.
Examples
--------
* Please visit [demo](demo).
\ No newline at end of file
basic_walkthrough Basic feature walkthrough
boost_from_prediction Boosting from existing prediction
early_stopping Early Stop in training
LightGBM R examples
====
* [Basic walkthrough of wrappers](basic_walkthrough.R)
* [Boosting from existing prediction](boost_from_prediction.R)
* [Early Stopping](early_stopping.R)
require(lightgbm)
require(methods)
# we load in the agaricus dataset
# In this example, we are aiming to predict whether a mushroom is edible
data(agaricus.train, package='lightgbm')
data(agaricus.test, package='lightgbm')
train <- agaricus.train
test <- agaricus.test
# the loaded data is stored in sparseMatrix, and label is a numeric vector in {0,1}
class(train$label)
class(train$data)
#-------------Basic Training using lightgbm-----------------
# this is the basic usage of lightgbm you can put matrix in data field
# note: we are putting in sparse matrix here, lightgbm naturally handles sparse input
# use sparse matrix when your feature is sparse(e.g. when you are using one-hot encoding vector)
print("Training lightgbm with sparseMatrix")
bst <- lightgbm(data = train$data, label = train$label, num_leaves = 4, learning_rate = 1, nrounds = 2,
objective = "binary")
# alternatively, you can put in dense matrix, i.e. basic R-matrix
print("Training lightgbm with Matrix")
bst <- lightgbm(data = as.matrix(train$data), label = train$label, num_leaves = 4, learning_rate = 1, nrounds = 2,
objective = "binary")
# you can also put in lgb.Dataset object, which stores label, data and other meta datas needed for advanced features
print("Training lightgbm with lgb.Dataset")
dtrain <- lgb.Dataset(data = train$data, label = train$label)
bst <- lightgbm(data = dtrain, num_leaves = 4, learning_rate = 1, nrounds = 2,
objective = "binary")
# Verbose = 0,1,2
print("Train lightgbm with verbose 0, no message")
bst <- lightgbm(data = dtrain, num_leaves = 4, learning_rate = 1, nrounds = 2,
objective = "binary", verbose = 0)
print("Train lightgbm with verbose 1, print evaluation metric")
bst <- lightgbm(data = dtrain, num_leaves = 4, learning_rate = 1, nrounds = 2,
nthread = 2, objective = "binary", verbose = 1)
print("Train lightgbm with verbose 2, also print information about tree")
bst <- lightgbm(data = dtrain, num_leaves = 4, learning_rate = 1, nrounds = 2,
nthread = 2, objective = "binary", verbose = 2)
# you can also specify data as file path to a LibSVM/TCV/CSV format input
# since we do not have this file with us, the following line is just for illustration
# bst <- lightgbm(data = 'agaricus.train.svm', num_leaves = 4, learning_rate = 1, nrounds = 2,objective = "binary")
#--------------------basic prediction using lightgbm--------------
# you can do prediction using the following line
# you can put in Matrix, sparseMatrix, or lgb.Dataset
pred <- predict(bst, test$data)
err <- mean(as.numeric(pred > 0.5) != test$label)
print(paste("test-error=", err))
#-------------------save and load models-------------------------
# save model to binary local file
lgb.save(bst, "lightgbm.model")
# load binary model to R
bst2 <- lgb.load("lightgbm.model")
pred2 <- predict(bst2, test$data)
# pred2 should be identical to pred
print(paste("sum(abs(pred2-pred))=", sum(abs(pred2-pred))))
#----------------Advanced features --------------
# to use advanced features, we need to put data in lgb.Dataset
dtrain <- lgb.Dataset(data = train$data, label=train$label, free_raw_data=FALSE)
dtest <- lgb.Dataset(data = test$data, label=test$label, free_raw_data=FALSE)
#---------------Using valids----------------
# valids is a list of lgb.Dataset, each of them is tagged with name
valids <- list(train=dtrain, test=dtest)
# to train with valids, use lgb.train, which contains more advanced features
# valids allows us to monitor the evaluation result on all data in the list
print("Train lightgbm using lgb.train with valids")
bst <- lgb.train(data=dtrain, num_leaves=4, learning_rate=1, nrounds=2, valids=valids,
nthread = 2, objective = "binary")
# we can change evaluation metrics, or use multiple evaluation metrics
print("train lightgbm using lgb.train with valids, watch logloss and error")
bst <- lgb.train(data=dtrain, num_leaves=4, learning_rate=1, nrounds=2, valids=valids,
eval = c("binary_error","binary_logloss"),
nthread = 2, objective = "binary")
# lgb.Dataset can also be saved using lgb.Dataset.save
lgb.Dataset.save(dtrain, "dtrain.buffer")
# to load it in, simply call lgb.Dataset
dtrain2 <- lgb.Dataset("dtrain.buffer")
bst <- lgb.train(data=dtrain2, num_leaves=4, learning_rate=1, nrounds=2, valids=valids,
nthread = 2, objective = "binary")
# information can be extracted from lgb.Dataset using getinfo
label = getinfo(dtest, "label")
pred <- predict(bst, test$data)
err <- as.numeric(sum(as.integer(pred > 0.5) != label))/length(label)
print(paste("test-error=", err))
require(lightgbm)
require(methods)
# load in the agaricus dataset
data(agaricus.train, package='lightgbm')
data(agaricus.test, package='lightgbm')
dtrain <- lgb.Dataset(agaricus.train$data, label = agaricus.train$label)
dtest <- lgb.Dataset(agaricus.test$data, label = agaricus.test$label)
valids <- list(eval = dtest, train = dtrain)
###
# advanced: start from a initial base prediction
#
print('start running example to start from a initial prediction')
# train lightgbm for 1 round
param <- list(num_leaves=4, learning_rate=1, nthread = 2, silent=1, objective='binary')
bst <- lgb.train(param, dtrain, 1, valids=valids)
# Note: we need the margin value instead of transformed prediction in set_init_score
ptrain <- predict(bst, agaricus.train$data, rawscore=TRUE)
ptest <- predict(bst, agaricus.test$data, rawscore=TRUE)
# set the init_score property of dtrain and dtest
# base margin is the base prediction we will boost from
setinfo(dtrain, "init_score", ptrain)
setinfo(dtest, "init_score", ptest)
print('this is result of boost from initial prediction')
bst <- lgb.train(params = param, data = dtrain, nrounds = 5, valids = valids)
require(lightgbm)
require(methods)
# load in the agaricus dataset
data(agaricus.train, package='lightgbm')
data(agaricus.test, package='lightgbm')
dtrain <- lgb.Dataset(agaricus.train$data, label = agaricus.train$label)
dtest <- lgb.Dataset(agaricus.test$data, label = agaricus.test$label)
# note: for customized objective function, we leave objective as default
# note: what we are getting is margin value in prediction
# you must know what you are doing
param <- list(num_leaves=4, learning_rate=1)
valids <- list(eval = dtest)
num_round <- 20
# user define objective function, given prediction, return gradient and second order gradient
# this is loglikelihood loss
logregobj <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label")
preds <- 1/(1 + exp(-preds))
grad <- preds - labels
hess <- preds * (1 - preds)
return(list(grad = grad, hess = hess))
}
# user defined evaluation function, return a pair metric_name, result, higher_better
# NOTE: when you do customized loss function, the default prediction value is margin
# this may make buildin evalution metric not function properly
# for example, we are doing logistic loss, the prediction is score before logistic transformation
# the buildin evaluation error assumes input is after logistic transformation
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
evalerror <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label")
err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
return(list(name = "error", value = err, higher_better=FALSE))
}
print ('start training with early Stopping setting')
bst <- lgb.train(param, dtrain, num_round, valids,
objective = logregobj, eval = evalerror,
early_stopping_round = 3)
...@@ -16,10 +16,12 @@ For more details, please refer to [Features](https://github.com/Microsoft/LightG ...@@ -16,10 +16,12 @@ For more details, please refer to [Features](https://github.com/Microsoft/LightG
News News
---- ----
01/08/2017 : Release [**R-package**](./R-package) beta version, welcome to have a try and provide feedback.
12/05/2016 : **Categorical Features as input directly**(without one-hot coding). Experiment on [Expo data](http://stat-computing.org/dataexpo/2009/) shows about 8x speed-up with same accuracy compared with one-hot coding (refer to [categorical log]( https://github.com/guolinke/boosting_tree_benchmarks/blob/master/lightgbm/lightgbm_dataexpo_speed.log) and [one-hot log]( https://github.com/guolinke/boosting_tree_benchmarks/blob/master/lightgbm/lightgbm_dataexpo_onehot_speed.log)). 12/05/2016 : **Categorical Features as input directly**(without one-hot coding). Experiment on [Expo data](http://stat-computing.org/dataexpo/2009/) shows about 8x speed-up with same accuracy compared with one-hot coding (refer to [categorical log]( https://github.com/guolinke/boosting_tree_benchmarks/blob/master/lightgbm/lightgbm_dataexpo_speed.log) and [one-hot log]( https://github.com/guolinke/boosting_tree_benchmarks/blob/master/lightgbm/lightgbm_dataexpo_onehot_speed.log)).
For the setting details, please refer to [IO Parameters](./docs/Parameters.md#io-parameters). For the setting details, please refer to [IO Parameters](./docs/Parameters.md#io-parameters).
12/02/2016 : Release [**python-package**](./python-package) beta version, welcome to have a try and provide issues and feedback. 12/02/2016 : Release [**python-package**](./python-package) beta version, welcome to have a try and provide feedback.
Get Started And Documents Get Started And Documents
------------------------- -------------------------
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment