Commit b6c973af authored by Laurae's avatar Laurae Committed by Guolin Ke
Browse files

[R-package] Improvements, readability, and bug fixes (#378)

* Define environment in examples (xgboost clash)

* Large R code changes
parent e9275fb9
require(lightgbm) require(lightgbm)
require(methods) require(methods)
# load in the agaricus dataset
data(agaricus.train, package='lightgbm') # Load in the agaricus dataset
data(agaricus.test, package='lightgbm') data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = "lightgbm")
dtrain <- lgb.Dataset(agaricus.train$data, label = agaricus.train$label) dtrain <- lgb.Dataset(agaricus.train$data, label = agaricus.train$label)
dtest <- lgb.Dataset(agaricus.test$data, label = agaricus.test$label) dtest <- lgb.Dataset(agaricus.test$data, label = agaricus.test$label)
# note: for customized objective function, we leave objective as default
# note: what we are getting is margin value in prediction # Note: for customized objective function, we leave objective as default
# you must know what you are doing # Note: what we are getting is margin value in prediction
param <- list(num_leaves=4, learning_rate=1) # You must know what you are doing
param <- list(num_leaves = 4,
learning_rate = 1)
valids <- list(eval = dtest) valids <- list(eval = dtest)
num_round <- 20 num_round <- 20
# user define objective function, given prediction, return gradient and second order gradient
# this is loglikelihood loss # User define objective function, given prediction, return gradient and second order gradient
# This is loglikelihood loss
logregobj <- function(preds, dtrain) { logregobj <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label") labels <- getinfo(dtrain, "label")
preds <- 1/(1 + exp(-preds)) preds <- 1 / (1 + exp(-preds))
grad <- preds - labels grad <- preds - labels
hess <- preds * (1 - preds) hess <- preds * (1 - preds)
return(list(grad = grad, hess = hess)) return(list(grad = grad, hess = hess))
} }
# user defined evaluation function, return a pair metric_name, result, higher_better
# User defined evaluation function, return a pair metric_name, result, higher_better
# NOTE: when you do customized loss function, the default prediction value is margin # NOTE: when you do customized loss function, the default prediction value is margin
# this may make buildin evalution metric not function properly # This may make buildin evalution metric not function properly
# for example, we are doing logistic loss, the prediction is score before logistic transformation # For example, we are doing logistic loss, the prediction is score before logistic transformation
# the buildin evaluation error assumes input is after logistic transformation # The buildin evaluation error assumes input is after logistic transformation
# Take this in mind when you use the customization, and maybe you need write customized evaluation function # Take this in mind when you use the customization, and maybe you need write customized evaluation function
evalerror <- function(preds, dtrain) { evalerror <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label") labels <- getinfo(dtrain, "label")
err <- as.numeric(sum(labels != (preds > 0)))/length(labels) err <- as.numeric(sum(labels != (preds > 0))) / length(labels)
return(list(name = "error", value = err, higher_better=FALSE)) return(list(name = "error", value = err, higher_better = FALSE))
} }
print ('start training with early Stopping setting') print("Start training with early Stopping setting")
bst <- lgb.train(param, dtrain, num_round, valids, bst <- lgb.train(param,
objective = logregobj, eval = evalerror, dtrain,
num_round,
valids,
objective = logregobj,
eval = evalerror,
early_stopping_round = 3) early_stopping_round = 3)
require(lightgbm) require(lightgbm)
# we load the default iris dataset shipped with R # We load the default iris dataset shipped with R
data(iris) data(iris)
# we must convert factors to numeric # We must convert factors to numeric
# they must be starting from number 0 to use multiclass # They must be starting from number 0 to use multiclass
# for instance: 0, 1, 2, 3, 4, 5... # For instance: 0, 1, 2, 3, 4, 5...
iris$Species <- as.numeric(as.factor(iris$Species))-1 iris$Species <- as.numeric(as.factor(iris$Species)) - 1
# we cut the data set into 80% train and 20% validation # We cut the data set into 80% train and 20% validation
# the 10 last samples of each class are for validation # The 10 last samples of each class are for validation
train <- as.matrix(iris[c(1:40, 51:90, 101:140), ]) train <- as.matrix(iris[c(1:40, 51:90, 101:140), ])
test <- as.matrix(iris[c(41:50, 91:100, 141:150), ]) test <- as.matrix(iris[c(41:50, 91:100, 141:150), ])
dtrain <- lgb.Dataset(data=train[, 1:4], label=train[, 5]) dtrain <- lgb.Dataset(data = train[, 1:4], label = train[, 5])
dtest <- lgb.Dataset.create.valid(dtrain, data=test[, 1:4], label=test[, 5]) dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1:4], label = test[, 5])
valids <- list(test=dtest) valids <- list(test = dtest)
# method 1 of training # Method 1 of training
params <- list(objective="multiclass", metric="multi_error", num_class=3) params <- list(objective = "multiclass", metric = "multi_error", num_class = 3)
model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10) model <- lgb.train(params,
dtrain,
100,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
# we can predict on test data, outputs a 90-length vector # We can predict on test data, outputs a 90-length vector
# order: obs1 class1, obs1 class2, obs1 class3, obs2 class1, obs2 class2, obs2 class3... # Order: obs1 class1, obs1 class2, obs1 class3, obs2 class1, obs2 class2, obs2 class3...
my_preds <- predict(model, test[, 1:4]) my_preds <- predict(model, test[, 1:4])
# method 2 of training, identical # Method 2 of training, identical
model <- lgb.train(list(), dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10, objective="multiclass", metric="multi_error", num_class=3) model <- lgb.train(list(),
dtrain,
100,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10,
objective = "multiclass",
metric = "multi_error",
num_class = 3)
# we can predict on test data, identical # We can predict on test data, identical
my_preds <- predict(model, test[, 1:4]) my_preds <- predict(model, test[, 1:4])
# a (30x3) matrix with the predictions, use parameter reshape # A (30x3) matrix with the predictions, use parameter reshape
# class1 class2 class3 # class1 class2 class3
# obs1 obs1 obs1 # obs1 obs1 obs1
# obs2 obs2 obs2 # obs2 obs2 obs2
# .... .... .... # .... .... ....
my_preds <- predict(model, test[, 1:4], reshape=TRUE) my_preds <- predict(model, test[, 1:4], reshape = TRUE)
# we can also get the predicted scores before the Sigmoid/Softmax application # We can also get the predicted scores before the Sigmoid/Softmax application
my_preds <- predict(model, test[, 1:4], rawscore=TRUE) my_preds <- predict(model, test[, 1:4], rawscore = TRUE)
# raw score predictions as matrix instead of vector # Raw score predictions as matrix instead of vector
my_preds <- predict(model, test[, 1:4], rawscore=TRUE, reshape=TRUE) my_preds <- predict(model, test[, 1:4], rawscore = TRUE, reshape = TRUE)
# we can also get the leaf index # We can also get the leaf index
my_preds <- predict(model, test[, 1:4], predleaf=TRUE) my_preds <- predict(model, test[, 1:4], predleaf = TRUE)
# preddict leaf index as matrix instead of vector # Predict leaf index as matrix instead of vector
my_preds <- predict(model, test[, 1:4], predleaf=TRUE, reshape=TRUE) my_preds <- predict(model, test[, 1:4], predleaf = TRUE, reshape = TRUE)
...@@ -22,14 +22,16 @@ Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also ...@@ -22,14 +22,16 @@ Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also
be directly used with an \code{lgb.Dataset} object. be directly used with an \code{lgb.Dataset} object.
} }
\examples{ \examples{
dontrun{ \dontrun{
data(agaricus.train, package='lightgbm') library(lightgbm)
train <- agaricus.train data(agaricus.train, package = "lightgbm")
dtrain <- lgb.Dataset(train$data, label=train$label) train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
stopifnot(nrow(dtrain) == nrow(train$data)) stopifnot(nrow(dtrain) == nrow(train$data))
stopifnot(ncol(dtrain) == ncol(train$data)) stopifnot(ncol(dtrain) == ncol(train$data))
stopifnot(all(dim(dtrain) == dim(train$data))) stopifnot(all(dim(dtrain) == dim(train$data)))
} }
} }
...@@ -24,15 +24,17 @@ Generic \code{dimnames} methods are used by \code{colnames}. ...@@ -24,15 +24,17 @@ Generic \code{dimnames} methods are used by \code{colnames}.
Since row names are irrelevant, it is recommended to use \code{colnames} directly. Since row names are irrelevant, it is recommended to use \code{colnames} directly.
} }
\examples{ \examples{
dontrun{ \dontrun{
data(agaricus.train, package='lightgbm') library(lightgbm)
train <- agaricus.train data(agaricus.train, package = "lightgbm")
dtrain <- lgb.Dataset(train$data, label=train$label) train <- agaricus.train
lgb.Dataset.construct(dtrain) dtrain <- lgb.Dataset(train$data, label = train$label)
dimnames(dtrain) lgb.Dataset.construct(dtrain)
colnames(dtrain) dimnames(dtrain)
colnames(dtrain) <- make.names(1:ncol(train$data)) colnames(dtrain)
print(dtrain, verbose=TRUE) colnames(dtrain) <- make.names(1:ncol(train$data))
print(dtrain, verbose = TRUE)
} }
} }
...@@ -34,15 +34,18 @@ The \code{name} field can be one of the following: ...@@ -34,15 +34,18 @@ The \code{name} field can be one of the following:
} }
\examples{ \examples{
\dontrun{ \dontrun{
data(agaricus.train, package='lightgbm') library(lightgbm)
train <- agaricus.train data(agaricus.train, package = "lightgbm")
dtrain <- lgb.Dataset(train$data, label=train$label) train <- agaricus.train
lgb.Dataset.construct(dtrain) dtrain <- lgb.Dataset(train$data, label = train$label)
labels <- getinfo(dtrain, 'label') lgb.Dataset.construct(dtrain)
setinfo(dtrain, 'label', 1-labels)
labels2 <- getinfo(dtrain, 'label') labels <- lightgbm::getinfo(dtrain, "label")
stopifnot(all(labels2 == 1-labels)) lightgbm::setinfo(dtrain, "label", 1 - labels)
labels2 <- lightgbm::getinfo(dtrain, "label")
stopifnot(all(labels2 == 1 - labels))
} }
} }
...@@ -33,12 +33,14 @@ or local file (that was created previously by saving an \code{lgb.Dataset}). ...@@ -33,12 +33,14 @@ or local file (that was created previously by saving an \code{lgb.Dataset}).
} }
\examples{ \examples{
\dontrun{ \dontrun{
data(agaricus.train, package='lightgbm') library(lightgbm)
train <- agaricus.train data(agaricus.train, package = "lightgbm")
dtrain <- lgb.Dataset(train$data, label=train$label) train <- agaricus.train
lgb.Dataset.save(dtrain, 'lgb.Dataset.data') dtrain <- lgb.Dataset(train$data, label = train$label)
dtrain <- lgb.Dataset('lgb.Dataset.data') lgb.Dataset.save(dtrain, "lgb.Dataset.data")
lgb.Dataset.construct(dtrain) dtrain <- lgb.Dataset("lgb.Dataset.data")
lgb.Dataset.construct(dtrain)
} }
} }
...@@ -14,10 +14,12 @@ Construct Dataset explicitly ...@@ -14,10 +14,12 @@ Construct Dataset explicitly
} }
\examples{ \examples{
\dontrun{ \dontrun{
data(agaricus.train, package='lightgbm') library(lightgbm)
train <- agaricus.train data(agaricus.train, package = "lightgbm")
dtrain <- lgb.Dataset(train$data, label=train$label) train <- agaricus.train
lgb.Dataset.construct(dtrain) dtrain <- lgb.Dataset(train$data, label = train$label)
lgb.Dataset.construct(dtrain)
} }
} }
...@@ -23,12 +23,14 @@ Construct validation data according to training data ...@@ -23,12 +23,14 @@ Construct validation data according to training data
} }
\examples{ \examples{
\dontrun{ \dontrun{
data(agaricus.train, package='lightgbm') library(lightgbm)
train <- agaricus.train data(agaricus.train, package = "lightgbm")
dtrain <- lgb.Dataset(train$data, label=train$label) train <- agaricus.train
data(agaricus.test, package='lightgbm') dtrain <- lgb.Dataset(train$data, label = train$label)
test <- agaricus.test data(agaricus.test, package = "lightgbm")
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label) test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
} }
} }
...@@ -18,11 +18,14 @@ passed dataset ...@@ -18,11 +18,14 @@ passed dataset
Save \code{lgb.Dataset} to a binary file Save \code{lgb.Dataset} to a binary file
} }
\examples{ \examples{
\dontrun{ \dontrun{
data(agaricus.train, package='lightgbm') library(lightgbm)
train <- agaricus.train data(agaricus.train, package = "lightgbm")
dtrain <- lgb.Dataset(train$data, label=train$label) train <- agaricus.train
lgb.Dataset.save(dtrain, "data.bin") dtrain <- lgb.Dataset(train$data, label = train$label)
lgb.Dataset.save(dtrain, "data.bin")
} }
} }
...@@ -19,12 +19,14 @@ Set categorical feature of \code{lgb.Dataset} ...@@ -19,12 +19,14 @@ Set categorical feature of \code{lgb.Dataset}
} }
\examples{ \examples{
\dontrun{ \dontrun{
data(agaricus.train, package='lightgbm') library(lightgbm)
train <- agaricus.train data(agaricus.train, package = "lightgbm")
dtrain <- lgb.Dataset(train$data, label=train$label) train <- agaricus.train
lgb.Dataset.save(dtrain, 'lgb.Dataset.data') dtrain <- lgb.Dataset(train$data, label = train$label)
dtrain <- lgb.Dataset('lgb.Dataset.data') lgb.Dataset.save(dtrain, "lgb.Dataset.data")
lgb.Dataset.set.categorical(dtrain, 1:2) dtrain <- lgb.Dataset("lgb.Dataset.data")
lgb.Dataset.set.categorical(dtrain, 1:2)
} }
} }
...@@ -19,13 +19,15 @@ If you want to use validation data, you should set reference to training data ...@@ -19,13 +19,15 @@ If you want to use validation data, you should set reference to training data
} }
\examples{ \examples{
\dontrun{ \dontrun{
data(agaricus.train, package='lightgbm') library(lightgbm)
train <- agaricus.train data(agaricus.train, package ="lightgbm")
dtrain <- lgb.Dataset(train$data, label=train$label) train <- agaricus.train
data(agaricus.test, package='lightgbm') dtrain <- lgb.Dataset(train$data, label = train$label)
test <- agaricus.test data(agaricus.test, package = "lightgbm")
dtest <- lgb.Dataset(test$data, test=train$label) test <- agaricus.test
lgb.Dataset.set.reference(dtest, dtrain) dtest <- lgb.Dataset(test$data, test = train$label)
lgb.Dataset.set.reference(dtest, dtrain)
} }
} }
...@@ -19,17 +19,24 @@ Dump LightGBM model to json ...@@ -19,17 +19,24 @@ Dump LightGBM model to json
} }
\examples{ \examples{
\dontrun{ \dontrun{
library(lightgbm) library(lightgbm)
data(agaricus.train, package='lightgbm') data(agaricus.train, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
data(agaricus.test, package='lightgbm') data(agaricus.test, package = "lightgbm")
test <- agaricus.test test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label) dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective="regression", metric="l2") params <- list(objective = "regression", metric = "l2")
valids <- list(test=dtest) valids <- list(test = dtest)
model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10) model <- lgb.train(params,
json_model <- lgb.dump(model) dtrain,
100,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
json_model <- lgb.dump(model)
} }
} }
...@@ -24,8 +24,9 @@ For a tree model, a \code{data.table} with the following columns: ...@@ -24,8 +24,9 @@ For a tree model, a \code{data.table} with the following columns:
Creates a \code{data.table} of feature importances in a model. Creates a \code{data.table} of feature importances in a model.
} }
\examples{ \examples{
\dontrun{
data(agaricus.train, package = 'lightgbm') library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
...@@ -37,6 +38,7 @@ model <- lgb.train(params, dtrain, 20) ...@@ -37,6 +38,7 @@ model <- lgb.train(params, dtrain, 20)
tree_imp1 <- lgb.importance(model, percentage = TRUE) tree_imp1 <- lgb.importance(model, percentage = TRUE)
tree_imp2 <- lgb.importance(model, percentage = FALSE) tree_imp2 <- lgb.importance(model, percentage = FALSE)
}
} }
...@@ -27,14 +27,15 @@ For multiclass classification, a \code{list} of \code{data.table} with the Featu ...@@ -27,14 +27,15 @@ For multiclass classification, a \code{list} of \code{data.table} with the Featu
Computes feature contribution components of rawscore prediction. Computes feature contribution components of rawscore prediction.
} }
\examples{ \examples{
\dontrun{
library(lightgbm)
Sigmoid <- function(x) 1 / (1 + exp(-x)) Sigmoid <- function(x) 1 / (1 + exp(-x))
Logit <- function(x) log(x / (1 - x)) Logit <- function(x) log(x / (1 - x))
data(agaricus.train, package = 'lightgbm') data(agaricus.train, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
setinfo(dtrain, "init_score", rep(Logit(mean(train$label)), length(train$label))) setinfo(dtrain, "init_score", rep(Logit(mean(train$label)), length(train$label)))
data(agaricus.test, package = 'lightgbm') data(agaricus.test, package = "lightgbm")
test <- agaricus.test test <- agaricus.test
params = list(objective = "binary", params = list(objective = "binary",
...@@ -44,6 +45,7 @@ params = list(objective = "binary", ...@@ -44,6 +45,7 @@ params = list(objective = "binary",
model <- lgb.train(params, dtrain, 20) model <- lgb.train(params, dtrain, 20)
tree_interpretation <- lgb.interprete(model, test$data, 1:5) tree_interpretation <- lgb.interprete(model, test$data, 1:5)
}
} }
...@@ -17,18 +17,25 @@ Load LightGBM model from saved model file ...@@ -17,18 +17,25 @@ Load LightGBM model from saved model file
} }
\examples{ \examples{
\dontrun{ \dontrun{
library(lightgbm) library(lightgbm)
data(agaricus.train, package='lightgbm') data(agaricus.train, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
data(agaricus.test, package='lightgbm') data(agaricus.test, package = "lightgbm")
test <- agaricus.test test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label) dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective="regression", metric="l2") params <- list(objective = "regression", metric = "l2")
valids <- list(test=dtest) valids <- list(test = dtest)
model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10) model <- lgb.train(params,
lgb.save(model, "model.txt") dtrain,
load_booster <- lgb.load("model.txt") 100,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
lgb.save(model, "model.txt")
load_booster <- lgb.load("model.txt")
} }
} }
...@@ -18,7 +18,7 @@ The columns of the \code{data.table} are: ...@@ -18,7 +18,7 @@ The columns of the \code{data.table} are:
\item \code{tree_index}: ID of a tree in a model (integer) \item \code{tree_index}: ID of a tree in a model (integer)
\item \code{split_index}: ID of a node in a tree (integer) \item \code{split_index}: ID of a node in a tree (integer)
\item \code{split_feature}: for a node, it's a feature name (character); \item \code{split_feature}: for a node, it's a feature name (character);
for a leaf, it simply labels it as \code{'NA'} for a leaf, it simply labels it as \code{"NA"}
\item \code{node_parent}: ID of the parent node for current node (integer) \item \code{node_parent}: ID of the parent node for current node (integer)
\item \code{leaf_index}: ID of a leaf in a tree (integer) \item \code{leaf_index}: ID of a leaf in a tree (integer)
\item \code{leaf_parent}: ID of the parent node for current leaf (integer) \item \code{leaf_parent}: ID of the parent node for current leaf (integer)
...@@ -35,8 +35,10 @@ The columns of the \code{data.table} are: ...@@ -35,8 +35,10 @@ The columns of the \code{data.table} are:
Parse a LightGBM model json dump into a \code{data.table} structure. Parse a LightGBM model json dump into a \code{data.table} structure.
} }
\examples{ \examples{
\dontrun{
library(lightgbm)
data(agaricus.train, package = 'lightgbm') data(agaricus.train, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
...@@ -47,6 +49,7 @@ params = list(objective = "binary", ...@@ -47,6 +49,7 @@ params = list(objective = "binary",
model <- lgb.train(params, dtrain, 20) model <- lgb.train(params, dtrain, 20)
tree_dt <- lgb.model.dt.tree(model) tree_dt <- lgb.model.dt.tree(model)
}
} }
...@@ -30,8 +30,8 @@ The graph represents each feature as a horizontal bar of length proportional to ...@@ -30,8 +30,8 @@ The graph represents each feature as a horizontal bar of length proportional to
Features are shown ranked in a decreasing importance order. Features are shown ranked in a decreasing importance order.
} }
\examples{ \examples{
\dontrun{
data(agaricus.train, package = 'lightgbm') data(agaricus.train, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
...@@ -43,6 +43,7 @@ model <- lgb.train(params, dtrain, 20) ...@@ -43,6 +43,7 @@ model <- lgb.train(params, dtrain, 20)
tree_imp <- lgb.importance(model, percentage = TRUE) tree_imp <- lgb.importance(model, percentage = TRUE)
lgb.plot.importance(tree_imp, top_n = 10, measure = "Gain") lgb.plot.importance(tree_imp, top_n = 10, measure = "Gain")
}
} }
...@@ -19,7 +19,7 @@ lgb.plot.interpretation(tree_interpretation_dt, top_n = 10, cols = 1, ...@@ -19,7 +19,7 @@ lgb.plot.interpretation(tree_interpretation_dt, top_n = 10, cols = 1,
\item{cex}{(base R barplot) passed as \code{cex.names} parameter to \code{barplot}.} \item{cex}{(base R barplot) passed as \code{cex.names} parameter to \code{barplot}.}
} }
\value{ \value{
The \code{lgb.plot.interpretation} function creates a \code{barplot} The \code{lgb.plot.interpretation} function creates a \code{barplot}.
} }
\description{ \description{
Plot previously calculated feature contribution as a bar graph. Plot previously calculated feature contribution as a bar graph.
...@@ -29,14 +29,15 @@ The graph represents each feature as a horizontal bar of length proportional to ...@@ -29,14 +29,15 @@ The graph represents each feature as a horizontal bar of length proportional to
Features are shown ranked in a decreasing contribution order. Features are shown ranked in a decreasing contribution order.
} }
\examples{ \examples{
\dontrun{
Sigmoid <- function(x) 1 / (1 + exp(-x)) library(lightgbm)
Logit <- function(x) log(x / (1 - x)) Sigmoid <- function(x) {1 / (1 + exp(-x))}
data(agaricus.train, package = 'lightgbm') Logit <- function(x) {log(x / (1 - x))}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
setinfo(dtrain, "init_score", rep(Logit(mean(train$label)), length(train$label))) setinfo(dtrain, "init_score", rep(Logit(mean(train$label)), length(train$label)))
data(agaricus.test, package = 'lightgbm') data(agaricus.test, package = "lightgbm")
test <- agaricus.test test <- agaricus.test
params = list(objective = "binary", params = list(objective = "binary",
...@@ -47,6 +48,7 @@ model <- lgb.train(params, dtrain, 20) ...@@ -47,6 +48,7 @@ model <- lgb.train(params, dtrain, 20)
tree_interpretation <- lgb.interprete(model, test$data, 1:5) tree_interpretation <- lgb.interprete(model, test$data, 1:5)
lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10) lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10)
}
} }
...@@ -21,17 +21,24 @@ Save LightGBM model ...@@ -21,17 +21,24 @@ Save LightGBM model
} }
\examples{ \examples{
\dontrun{ \dontrun{
library(lightgbm) library(lightgbm)
data(agaricus.train, package='lightgbm') data(agaricus.train, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
data(agaricus.test, package='lightgbm') data(agaricus.test, package = "lightgbm")
test <- agaricus.test test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label) dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective="regression", metric="l2") params <- list(objective = "regression", metric = "l2")
valids <- list(test=dtest) valids <- list(test = dtest)
model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10) model <- lgb.train(params,
lgb.save(model, "model.txt") dtrain,
100,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
lgb.save(model, "model.txt")
} }
} }
...@@ -149,24 +149,37 @@ Its documentation is combined with lgb.train. ...@@ -149,24 +149,37 @@ Its documentation is combined with lgb.train.
} }
\examples{ \examples{
\dontrun{ \dontrun{
library(lightgbm) library(lightgbm)
data(agaricus.train, package='lightgbm') data(agaricus.train, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
params <- list(objective="regression", metric="l2") params <- list(objective = "regression", metric = "l2")
model <- lgb.cv(params, dtrain, 10, nfold=5, min_data=1, learning_rate=1, early_stopping_rounds=10) model <- lgb.cv(params,
dtrain,
10,
nfold = 5,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
} }
\dontrun{ \dontrun{
library(lightgbm) library(lightgbm)
data(agaricus.train, package='lightgbm') data(agaricus.train, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
data(agaricus.test, package='lightgbm') data(agaricus.test, package = "lightgbm")
test <- agaricus.test test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label) dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective="regression", metric="l2") params <- list(objective = "regression", metric = "l2")
valids <- list(test=dtest) valids <- list(test = dtest)
model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10) model <- lgb.train(params,
dtrain,
100,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
} }
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment