Commit eded794e authored by James Lamb's avatar James Lamb Committed by Qiwei Ye
Browse files

[R-package] CRAN fixes (#1499)

* Fixed typos in docs

* Fixed inconsistencies in documentation

* Updated strategy for registering routines

* Fixed issues caused by smashing multiple functions into one Rd

* Fixed issues with documentation

* Removed VignetteBuilder and updated Rbuildignore

* Added R build artefacts to gitignore

* Added namespacing on data.table set function. Updated handling of CMakeLists file to get around CRAN check.

* Updated build instructions

* Added R build script

* Removed build_r.sh script and updated R-package install instructions
parent 80a9a941
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.cv.R
\name{lgb.cv}
\alias{lgb.cv}
\title{Main CV logic for LightGBM}
\usage{
lgb.cv(params = list(), data, nrounds = 10, nfold = 3, label = NULL,
weight = NULL, obj = NULL, eval = NULL, verbose = 1, record = TRUE,
eval_freq = 1L, showsd = TRUE, stratified = TRUE, folds = NULL,
init_model = NULL, colnames = NULL, categorical_feature = NULL,
early_stopping_rounds = NULL, callbacks = list(), ...)
}
\arguments{
\item{params}{List of parameters}
\item{data}{a \code{lgb.Dataset} object, used for training}
\item{nrounds}{number of training rounds}
\item{nfold}{the original dataset is randomly partitioned into \code{nfold} equal size subsamples.}
\item{label}{vector of response values. Should be provided only when data is an R-matrix.}
\item{weight}{vector of response values. If not NULL, will set to dataset}
\item{obj}{objective function, can be character or custom objective function. Examples include
\code{regression}, \code{regression_l1}, \code{huber},
\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
\item{eval}{evaluation function, can be (list of) character or custom eval function}
\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training}
\item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}}
\item{eval_freq}{evaluation output frequency, only effect when verbose > 0}
\item{showsd}{\code{boolean}, whether to show standard deviation of cross validation}
\item{stratified}{a \code{boolean} indicating whether sampling of folds should be stratified
by the values of outcome labels.}
\item{folds}{\code{list} provides a possibility to use a list of pre-defined CV folds
(each element must be a vector of test fold's indices). When folds are supplied,
the \code{nfold} and \code{stratified} parameters are ignored.}
\item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model}
\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
\item{categorical_feature}{list of str or int
type int represents index,
type str represents feature names}
\item{early_stopping_rounds}{int
Activates early stopping.
Requires at least one validation data and one metric
If there's more than one, will check all of them except the training data
Returns the model with (best_iter + early_stopping_rounds)
If early stopping occurs, the model will have 'best_iter' field}
\item{callbacks}{list of callback functions
List of callback functions that are applied at each iteration.}
\item{...}{other parameters, see Parameters.rst for more information. A few key parameters:
\itemize{
\item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}}
\item{num_leaves}{number of leaves in one tree. defaults to 127}
\item{max_depth}{Limit the max depth for tree model. This is used to deal with
overfit when #data is small. Tree still grow by leaf-wise.}
\item{num_threads}{Number of threads for LightGBM. For the best speed, set this to
the number of real CPU cores, not the number of threads (most
CPU using hyper-threading to generate 2 threads per CPU core).}
}}
}
\value{
a trained model \code{lgb.CVBooster}.
}
\description{
Cross validation logic used by LightGBM
}
\examples{
\dontrun{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
params <- list(objective = "regression", metric = "l2")
model <- lgb.cv(params,
dtrain,
10,
nfold = 5,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
}
}
...@@ -9,7 +9,7 @@ lgb.model.dt.tree(model, num_iteration = NULL) ...@@ -9,7 +9,7 @@ lgb.model.dt.tree(model, num_iteration = NULL)
\arguments{ \arguments{
\item{model}{object of class \code{lgb.Booster}} \item{model}{object of class \code{lgb.Booster}}
\item{num_iteration}{number of iterations you want to predict with. NULL or \item{num_iteration}{number of iterations you want to predict with. NULL or
<= 0 means use best iteration} <= 0 means use best iteration}
} }
\value{ \value{
...@@ -26,7 +26,7 @@ The columns of the \code{data.table} are: ...@@ -26,7 +26,7 @@ The columns of the \code{data.table} are:
\item \code{leaf_index}: ID of a leaf in a tree (integer) \item \code{leaf_index}: ID of a leaf in a tree (integer)
\item \code{leaf_parent}: ID of the parent node for current leaf (integer) \item \code{leaf_parent}: ID of the parent node for current leaf (integer)
\item \code{split_gain}: Split gain of a node \item \code{split_gain}: Split gain of a node
\item \code{threshold}: Spliting threshold value of a node \item \code{threshold}: Splitting threshold value of a node
\item \code{decision_type}: Decision type of a node \item \code{decision_type}: Decision type of a node
\item \code{default_left}: Determine how to handle NA value, TRUE -> Left, FALSE -> Right \item \code{default_left}: Determine how to handle NA value, TRUE -> Left, FALSE -> Right
\item \code{internal_value}: Node value \item \code{internal_value}: Node value
......
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.cv.R, R/lgb.train.R, R/lightgbm.R % Please edit documentation in R/lgb.train.R
\name{lgb.cv} \name{lgb.train}
\alias{lgb.cv}
\alias{lgb.train} \alias{lgb.train}
\alias{lightgbm} \title{Main training logic for LightGBM}
\title{Main CV logic for LightGBM}
\usage{ \usage{
lgb.cv(params = list(), data, nrounds = 10, nfold = 3, label = NULL,
weight = NULL, obj = NULL, eval = NULL, verbose = 1, record = TRUE,
eval_freq = 1L, showsd = TRUE, stratified = TRUE, folds = NULL,
init_model = NULL, colnames = NULL, categorical_feature = NULL,
early_stopping_rounds = NULL, callbacks = list(), ...)
lgb.train(params = list(), data, nrounds = 10, valids = list(), lgb.train(params = list(), data, nrounds = 10, valids = list(),
obj = NULL, eval = NULL, verbose = 1, record = TRUE, eval_freq = 1L, obj = NULL, eval = NULL, verbose = 1, record = TRUE, eval_freq = 1L,
init_model = NULL, colnames = NULL, categorical_feature = NULL, init_model = NULL, colnames = NULL, categorical_feature = NULL,
early_stopping_rounds = NULL, callbacks = list(), reset_data = FALSE, early_stopping_rounds = NULL, callbacks = list(), reset_data = FALSE,
...) ...)
lightgbm(data, label = NULL, weight = NULL, params = list(),
nrounds = 10, verbose = 1, eval_freq = 1L,
early_stopping_rounds = NULL, save_name = "lightgbm.model",
init_model = NULL, callbacks = list(), ...)
} }
\arguments{ \arguments{
\item{params}{List of parameters} \item{params}{List of parameters}
\item{data}{a \code{lgb.Dataset} object, used for CV}
\item{nrounds}{number of CV rounds}
\item{nfold}{the original dataset is randomly partitioned into \code{nfold} equal size subsamples.}
\item{label}{vector of response values. Should be provided only when data is an R-matrix.}
\item{weight}{vector of response values. If not NULL, will set to dataset}
\item{obj}{objective function, can be character or custom objective function. Examples include
\code{regression}, \code{regression_l1}, \code{huber},
\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
\item{eval}{evaluation function, can be (list of) character or custom eval function}
\item{verbose}{verbosity for output, if <= 0, also will disable the print of evalutaion during training}
\item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}}
\item{eval_freq}{evalutaion output frequence, only effect when verbose > 0}
\item{showsd}{\code{boolean}, whether to show standard deviation of cross validation}
\item{stratified}{a \code{boolean} indicating whether sampling of folds should be stratified
by the values of outcome labels.}
\item{folds}{\code{list} provides a possibility to use a list of pre-defined CV folds
(each element must be a vector of test fold's indices). When folds are supplied,
the \code{nfold} and \code{stratified} parameters are ignored.}
\item{init_model}{path of model file of \code{lgb.Booster} object, will continue train from this model}
\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
\item{categorical_feature}{list of str or int
type int represents index,
type str represents feature names}
\item{early_stopping_rounds}{int
Activates early stopping.
CV score needs to improve at least every early_stopping_rounds round(s) to continue.
Requires at least one metric.
If there's more than one, will check all of them.
Returns the model with (best_iter + early_stopping_rounds).
If early stopping occurs, the model will have 'best_iter' field}
\item{callbacks}{list of callback functions
List of callback functions that are applied at each iteration.}
\item{...}{other parameters, see Parameters.rst for more informations}
\item{valids}{a list of \code{lgb.Dataset} objects, used for validation}
\item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model into a predictor model which frees up memory and the original datasets}
\item{boosting}{boosting type. \code{gbdt}, \code{dart}}
\item{num_leaves}{number of leaves in one tree. defaults to 127}
\item{max_depth}{Limit the max depth for tree model. This is used to deal with overfit when #data is small.
Tree still grow by leaf-wise.}
\item{num_threads}{Number of threads for LightGBM. For the best speed, set this to the number of real CPU cores, not the number of threads (most CPU using hyper-threading to generate 2 threads per CPU core).}
\item{params}{List of parameters}
\item{data}{a \code{lgb.Dataset} object, used for training} \item{data}{a \code{lgb.Dataset} object, used for training}
\item{nrounds}{number of training rounds} \item{nrounds}{number of training rounds}
\item{valids}{a list of \code{lgb.Dataset} objects, used for validation}
\item{obj}{objective function, can be character or custom objective function. Examples include \item{obj}{objective function, can be character or custom objective function. Examples include
\code{regression}, \code{regression_l1}, \code{huber}, \code{regression}, \code{regression_l1}, \code{huber},
\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
\item{boosting}{boosting type. \code{gbdt}, \code{dart}}
\item{num_leaves}{number of leaves in one tree. defaults to 127}
\item{max_depth}{Limit the max depth for tree model. This is used to deal with overfit when #data is small.
Tree still grow by leaf-wise.}
\item{num_threads}{Number of threads for LightGBM. For the best speed, set this to the number of real CPU cores, not the number of threads (most CPU using hyper-threading to generate 2 threads per CPU core).}
\item{eval}{evaluation function, can be (a list of) character or custom eval function} \item{eval}{evaluation function, can be (a list of) character or custom eval function}
\item{verbose}{verbosity for output, if <= 0, also will disable the print of evalutaion during training} \item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training}
\item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}} \item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}}
\item{eval_freq}{evalutaion output frequency, only effect when verbose > 0} \item{eval_freq}{evaluation output frequency, only effect when verbose > 0}
\item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model} \item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model}
...@@ -128,26 +41,32 @@ type str represents feature names} ...@@ -128,26 +41,32 @@ type str represents feature names}
\item{early_stopping_rounds}{int \item{early_stopping_rounds}{int
Activates early stopping. Activates early stopping.
The model will train until the validation score stops improving. Requires at least one validation data and one metric
Validation score needs to improve at least every early_stopping_rounds round(s) to continue training. If there's more than one, will check all of them except the training data
Requires at least one validation data and one metric. Returns the model with (best_iter + early_stopping_rounds)
If there's more than one, will check all of them. But the training data is ignored anyway.
Returns the model with (best_iter + early_stopping_rounds).
If early stopping occurs, the model will have 'best_iter' field} If early stopping occurs, the model will have 'best_iter' field}
\item{callbacks}{list of callback functions \item{callbacks}{list of callback functions
List of callback functions that are applied at each iteration.} List of callback functions that are applied at each iteration.}
\item{...}{other parameters, see Parameters.rst for more informations} \item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model into a predictor model which frees up memory and the original datasets}
\item{...}{other parameters, see Parameters.rst for more information. A few key parameters:
\itemize{
\item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}}
\item{num_leaves}{number of leaves in one tree. defaults to 127}
\item{max_depth}{Limit the max depth for tree model. This is used to deal with
overfit when #data is small. Tree still grow by leaf-wise.}
\item{num_threads}{Number of threads for LightGBM. For the best speed, set this to
the number of real CPU cores, not the number of threads (most
CPU using hyper-threading to generate 2 threads per CPU core).}
}}
} }
\value{ \value{
a trained model \code{lgb.CVBooster}.
a trained booster model \code{lgb.Booster}. a trained booster model \code{lgb.Booster}.
} }
\description{ \description{
Simple interface for training an lightgbm model. Logic to train with LightGBM
Its documentation is combined with lgb.train.
} }
\examples{ \examples{
\dontrun{ \dontrun{
...@@ -155,20 +74,6 @@ library(lightgbm) ...@@ -155,20 +74,6 @@ library(lightgbm)
data(agaricus.train, package = "lightgbm") data(agaricus.train, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
params <- list(objective = "regression", metric = "l2")
model <- lgb.cv(params,
dtrain,
10,
nfold = 5,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
}
\dontrun{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
data(agaricus.test, package = "lightgbm") data(agaricus.test, package = "lightgbm")
test <- agaricus.test test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lightgbm.R
\name{lgb_shared_params}
\alias{lgb_shared_params}
\title{Shared parameter docs}
\arguments{
\item{callbacks}{list of callback functions
List of callback functions that are applied at each iteration.}
\item{data}{a \code{lgb.Dataset} object, used for training}
\item{early_stopping_rounds}{int
Activates early stopping.
Requires at least one validation data and one metric
If there's more than one, will check all of them except the training data
Returns the model with (best_iter + early_stopping_rounds)
If early stopping occurs, the model will have 'best_iter' field}
\item{eval_freq}{evaluation output frequency, only effect when verbose > 0}
\item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model}
\item{nrounds}{number of training rounds}
\item{params}{List of parameters}
\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training}
}
\description{
Parameter docs shared by \code{lgb.train}, \code{lgb.cv}, and \code{lightgbm}
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lightgbm.R
\name{lightgbm}
\alias{lightgbm}
\title{Train a LightGBM model}
\usage{
lightgbm(data, label = NULL, weight = NULL, params = list(),
nrounds = 10, verbose = 1, eval_freq = 1L,
early_stopping_rounds = NULL, save_name = "lightgbm.model",
init_model = NULL, callbacks = list(), ...)
}
\arguments{
\item{data}{a \code{lgb.Dataset} object, used for training}
\item{label}{Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}}
\item{weight}{vector of response values. If not NULL, will set to dataset}
\item{params}{List of parameters}
\item{nrounds}{number of training rounds}
\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training}
\item{eval_freq}{evaluation output frequency, only effect when verbose > 0}
\item{early_stopping_rounds}{int
Activates early stopping.
Requires at least one validation data and one metric
If there's more than one, will check all of them except the training data
Returns the model with (best_iter + early_stopping_rounds)
If early stopping occurs, the model will have 'best_iter' field}
\item{save_name}{File name to use when writing the trained model to disk. Should end in ".model".}
\item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model}
\item{callbacks}{list of callback functions
List of callback functions that are applied at each iteration.}
\item{...}{Additional arguments passed to \code{\link{lgb.train}}. For example
\itemize{
\item{valids}{a list of \code{lgb.Dataset} objects, used for validation}
\item{obj}{objective function, can be character or custom objective function. Examples include
\code{regression}, \code{regression_l1}, \code{huber},
\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
\item{eval}{evaluation function, can be (a list of) character or custom eval function}
\item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}}
\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
\item{categorical_feature}{list of str or int. type int represents index, type str represents feature names}
\item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model
into a predictor model which frees up memory and the original datasets}
\item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}}
\item{num_leaves}{number of leaves in one tree. defaults to 127}
\item{max_depth}{Limit the max depth for tree model. This is used to deal with
overfit when #data is small. Tree still grow by leaf-wise.}
\item{num_threads}{Number of threads for LightGBM. For the best speed, set this to
the number of real CPU cores, not the number of threads (most
CPU using hyper-threading to generate 2 threads per CPU core).}
}}
}
\description{
Simple interface for training an LightGBM model.
}
...@@ -5,8 +5,8 @@ ...@@ -5,8 +5,8 @@
\title{Predict method for LightGBM model} \title{Predict method for LightGBM model}
\usage{ \usage{
\method{predict}{lgb.Booster}(object, data, num_iteration = NULL, \method{predict}{lgb.Booster}(object, data, num_iteration = NULL,
rawscore = FALSE, predleaf = FALSE, header = FALSE, reshape = FALSE, rawscore = FALSE, predleaf = FALSE, predcontrib = FALSE,
...) header = FALSE, reshape = FALSE, ...)
} }
\arguments{ \arguments{
\item{object}{Object of class \code{lgb.Booster}} \item{object}{Object of class \code{lgb.Booster}}
...@@ -21,6 +21,8 @@ logistic regression would result in predictions for log-odds instead of probabil ...@@ -21,6 +21,8 @@ logistic regression would result in predictions for log-odds instead of probabil
\item{predleaf}{whether predict leaf index instead.} \item{predleaf}{whether predict leaf index instead.}
\item{predcontrib}{return per-feature contributions for each record.}
\item{header}{only used for prediction for text file. True if text file has header} \item{header}{only used for prediction for text file. True if text file has header}
\item{reshape}{whether to reshape the vector of predictions to a matrix form when there are several \item{reshape}{whether to reshape the vector of predictions to a matrix form when there are several
......
...@@ -15,7 +15,7 @@ readRDS.lgb.Booster(file = "", refhook = NULL) ...@@ -15,7 +15,7 @@ readRDS.lgb.Booster(file = "", refhook = NULL)
lgb.Booster. lgb.Booster.
} }
\description{ \description{
Attemps to load a model using RDS. Attempts to load a model using RDS.
} }
\examples{ \examples{
\dontrun{ \dontrun{
......
...@@ -26,7 +26,7 @@ saveRDS.lgb.Booster(object, file = "", ascii = FALSE, version = NULL, ...@@ -26,7 +26,7 @@ saveRDS.lgb.Booster(object, file = "", ascii = FALSE, version = NULL,
NULL invisibly. NULL invisibly.
} }
\description{ \description{
Attemps to save a model using RDS. Has an additional parameter (\code{raw}) which decides whether to save the raw model or not. Attempts to save a model using RDS. Has an additional parameter (\code{raw}) which decides whether to save the raw model or not.
} }
\examples{ \examples{
\dontrun{ \dontrun{
......
...@@ -21,7 +21,7 @@ constructed sub dataset ...@@ -21,7 +21,7 @@ constructed sub dataset
} }
\description{ \description{
Get a new \code{lgb.Dataset} containing the specified rows of Get a new \code{lgb.Dataset} containing the specified rows of
orginal lgb.Dataset object original lgb.Dataset object
} }
\examples{ \examples{
\dontrun{ \dontrun{
......
// Register Dynamic Symbols
#include <R.h>
#include <Rinternals.h>
#include <R_ext/Rdynload.h>
#include "R_init.h"
void R_init_lightgbm(DllInfo* info) {
R_registerRoutines(info, NULL, NULL, NULL, NULL);
R_useDynamicSymbols(info, TRUE);
}
// Register Dynamic Symbols
#ifndef R_INIT_LIGHTGBM_H
#define R_INIT_LIGHTGBM_H
void R_init_lightgbm(DllInfo* info);
#endif // R_INIT_LIGHTGBM_H
...@@ -14,6 +14,12 @@ if (!(R_int_UUID == "0310d4b8-ccb1-4bb8-ba94-d36a55f60262" ...@@ -14,6 +14,12 @@ if (!(R_int_UUID == "0310d4b8-ccb1-4bb8-ba94-d36a55f60262"
|| R_int_UUID == "2fdf6c18-697a-4ba7-b8ef-11c0d92f1327")){ || R_int_UUID == "2fdf6c18-697a-4ba7-b8ef-11c0d92f1327")){
print("Warning: unmatched R_INTERNALS_UUID, may cannot run normally.") print("Warning: unmatched R_INTERNALS_UUID, may cannot run normally.")
} }
# Move in CMakeLists.txt
if (!file.copy("../inst/bin/CMakeLists.txt", "CMakeLists.txt", overwrite = TRUE)){
stop("Copying CMakeLists failed")
}
# Check for precompilation # Check for precompilation
if (!use_precompile) { if (!use_precompile) {
...@@ -21,26 +27,6 @@ if (!use_precompile) { ...@@ -21,26 +27,6 @@ if (!use_precompile) {
source_dir <- file.path(R_PACKAGE_SOURCE, "src", fsep = "/") source_dir <- file.path(R_PACKAGE_SOURCE, "src", fsep = "/")
setwd(source_dir) setwd(source_dir)
if (!file.exists("_IS_FULL_PACKAGE")) {
unlink("./include", recursive = TRUE)
unlink("./src", recursive = TRUE)
unlink("./compute", recursive = TRUE)
unlink("./build", recursive = TRUE)
if (!file.copy("./../../include", "./", overwrite = TRUE, recursive = TRUE)) {
stop("Cannot find folder LightGBM/include")
}
if (!file.copy("./../../src", "./", overwrite = TRUE, recursive = TRUE)) {
stop("Cannot find folder LightGBM/src")
}
if (!file.copy("./../../compute", "./", overwrite = TRUE, recursive = TRUE)) {
print("Cannot find folder LightGBM/compute, disabling GPU build.")
use_gpu <- FALSE
}
if (!file.copy("./../../CMakeLists.txt", "./", overwrite = TRUE, recursive = TRUE)) {
stop("Cannot find file LightGBM/CMakeLists.txt")
}
}
# Prepare building package # Prepare building package
build_dir <- file.path(source_dir, "build", fsep = "/") build_dir <- file.path(source_dir, "build", fsep = "/")
dir.create(build_dir, recursive = TRUE, showWarnings = FALSE) dir.create(build_dir, recursive = TRUE, showWarnings = FALSE)
......
...@@ -62,7 +62,7 @@ test_that("lgb.Dataset: colnames", { ...@@ -62,7 +62,7 @@ test_that("lgb.Dataset: colnames", {
test_that("lgb.Dataset: nrow is correct for a very sparse matrix", { test_that("lgb.Dataset: nrow is correct for a very sparse matrix", {
nr <- 1000 nr <- 1000
x <- rsparsematrix(nr, 100, density=0.0005) x <- Matrix::rsparsematrix(nr, 100, density=0.0005)
# we want it very sparse, so that last rows are empty # we want it very sparse, so that last rows are empty
expect_lt(max(x@i), nr) expect_lt(max(x@i), nr)
dtest <- lgb.Dataset(x) dtest <- lgb.Dataset(x)
......
# for macOS (replace 7 with version of gcc installed on your machine)
# NOTE: your gcc / g++ from Homebrew is probably in /usr/local/bin
#export CXX=/usr/local/bin/g++-8 CC=/usr/local/bin/gcc-8
# Sys.setenv("CXX" = "/usr/local/bin/g++-8")
# Sys.setenv("CC" = "/usr/local/bin/gcc-8")
# R returns FALSE (not a non-zero exit code) if a file copy operation
# breaks. Let's fix that
.handle_result <- function(res){
if (!res){
stop("Copying files failed!")
}
}
# Make a new temporary folder to work in
unlink(x = "lightgbm_r", recursive = TRUE)
dir.create("lightgbm_r")
# copy in the relevant files
result <- file.copy(
from = "R-package/./"
, to = "lightgbm_r/"
, recursive = TRUE
, overwrite = TRUE
)
.handle_result(result)
result <- file.copy(
from = "include/"
, to = file.path("lightgbm_r", "src/")
, recursive = TRUE
, overwrite = TRUE
)
.handle_result(result)
result <- file.copy(
from = "src/"
, to = file.path("lightgbm_r", "src/")
, recursive = TRUE
, overwrite = TRUE
)
.handle_result(result)
result <- file.copy(
from = "CMakeLists.txt"
, to = file.path("lightgbm_r", "inst", "bin/")
, recursive = TRUE
, overwrite = TRUE
)
.handle_result(result)
# rebuild documentation
devtools::document(
pkg = "lightgbm_r/"
)
# Build the package
# NOTE: --keep-empty-dirs is necessary to keep the deep paths expected
# by CMake while also meeting the CRAN req to create object files
# on demand
devtools::build(
pkg = "lightgbm_r"
, args = c("--keep-empty-dirs")
)
# Install the package
version <- gsub(
"Version: "
, ""
, grep(
"Version: "
, readLines(con = file.path("lightgbm_r", "DESCRIPTION"))
, value = TRUE
)
)
tarball <- file.path(getwd(), sprintf("lightgbm_%s.tar.gz", version))
system(sprintf("R CMD INSTALL %s --no-multi-arch", tarball))
# Run R CMD CHECK
#R CMD CHECK lightgbm_2.1.2.tar.gz --as-cran | tee check.log | cat
...@@ -1096,7 +1096,7 @@ void GPUTreeLearner::FindBestSplits() { ...@@ -1096,7 +1096,7 @@ void GPUTreeLearner::FindBestSplits() {
void GPUTreeLearner::Split(Tree* tree, int best_Leaf, int* left_leaf, int* right_leaf) { void GPUTreeLearner::Split(Tree* tree, int best_Leaf, int* left_leaf, int* right_leaf) {
const SplitInfo& best_split_info = best_split_per_leaf_[best_Leaf]; const SplitInfo& best_split_info = best_split_per_leaf_[best_Leaf];
#if GPU_DEBUG >= 2 #if GPU_DEBUG >= 2
printf("Spliting leaf %d with feature %d thresh %d gain %f stat %f %f %f %f\n", best_Leaf, best_split_info.feature, best_split_info.threshold, best_split_info.gain, best_split_info.left_sum_gradient, best_split_info.right_sum_gradient, best_split_info.left_sum_hessian, best_split_info.right_sum_hessian); printf("Splitting leaf %d with feature %d thresh %d gain %f stat %f %f %f %f\n", best_Leaf, best_split_info.feature, best_split_info.threshold, best_split_info.gain, best_split_info.left_sum_gradient, best_split_info.right_sum_gradient, best_split_info.left_sum_hessian, best_split_info.right_sum_hessian);
#endif #endif
SerialTreeLearner::Split(tree, best_Leaf, left_leaf, right_leaf); SerialTreeLearner::Split(tree, best_Leaf, left_leaf, right_leaf);
if (Network::num_machines() == 1) { if (Network::num_machines() == 1) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment