Commit 551d59ca authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

R package (#168)

* finish R's c_api

* clean code

* fix sizeof pointer in 32bit system.

* add predictor class

* add Dataset class

* format code

* add booster

* add type check for expose function

* add a simple callback

* add all callbacks

* finish the basic training logic

* update docs

* add an simple training interface

* add basic test

* adapt the changes in c_api

* add test for Dataset

* add test for custom obj/eval functions

* fix python test

* fix bug in metadata init

* fix R CMD check
parent acbd4f34
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Dataset.R
\name{lgb.Dataset.create.valid}
\alias{lgb.Dataset.create.valid}
\title{Contruct a validation data}
\usage{
lgb.Dataset.create.valid(dataset, data, info = list(), ...)
}
\arguments{
\item{dataset}{\code{lgb.Dataset} object, training data}
\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename}
\item{info}{a list of information of the lgb.Dataset object}
\item{...}{other information to pass to \code{info}.}
}
\value{
constructed dataset
}
\description{
Contruct a validation data according to training data
}
\examples{
data(agaricus.train, package='lightgbm')
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label)
data(agaricus.test, package='lightgbm')
test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label)
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Dataset.R
\name{lgb.Dataset.save}
\alias{lgb.Dataset.save}
\title{save \code{lgb.Dataset} to binary file}
\usage{
lgb.Dataset.save(dataset, fname)
}
\arguments{
\item{dataset}{object of class \code{lgb.Dataset}}
\item{fname}{object filename of output file}
}
\value{
passed dataset
}
\description{
save \code{lgb.Dataset} to binary file
}
\examples{
data(agaricus.train, package='lightgbm')
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label)
lgb.Dataset.save(dtrain, "data.bin")
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Dataset.R
\name{lgb.Dataset.set.categorical}
\alias{lgb.Dataset.set.categorical}
\title{set categorical feature of \code{lgb.Dataset}}
\usage{
lgb.Dataset.set.categorical(dataset, categorical_feature)
}
\arguments{
\item{dataset}{object of class \code{lgb.Dataset}}
\item{categorical_feature}{categorical features}
}
\value{
passed dataset
}
\description{
set categorical feature of \code{lgb.Dataset}
}
\examples{
data(agaricus.train, package='lightgbm')
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label)
lgb.Dataset.save(dtrain, 'lgb.Dataset.data')
dtrain <- lgb.Dataset('lgb.Dataset.data')
lgb.Dataset.set.categorical(dtrain, 1:2)
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Dataset.R
\name{lgb.Dataset.set.reference}
\alias{lgb.Dataset.set.reference}
\title{set reference of \code{lgb.Dataset}}
\usage{
lgb.Dataset.set.reference(dataset, reference)
}
\arguments{
\item{dataset}{object of class \code{lgb.Dataset}}
\item{reference}{object of class \code{lgb.Dataset}}
}
\value{
passed dataset
}
\description{
set reference of \code{lgb.Dataset}.
If you want to use validation data, you should set its reference to training data
}
\examples{
data(agaricus.train, package='lightgbm')
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label)
data(agaricus.test, package='lightgbm')
test <- agaricus.test
dtest <- lgb.Dataset(test$data, test=train$label)
lgb.Dataset.set.reference(dtest, dtrain)
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Booster.R
\name{lgb.dump}
\alias{lgb.dump}
\title{Dump LightGBM model to json}
\usage{
lgb.dump(booster, num_iteration = NULL)
}
\arguments{
\item{booster}{Object of class \code{lgb.Booster}}
\item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration}
}
\value{
json format of model
}
\description{
Dump LightGBM model to json
}
\examples{
library(lightgbm)
data(agaricus.train, package='lightgbm')
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label)
data(agaricus.test, package='lightgbm')
test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label)
params <- list(objective="regression", metric="l2")
valids <- list(test=dtest)
model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10)
json_model <- lgb.dump(model)
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Booster.R
\name{lgb.get.eval.result}
\alias{lgb.get.eval.result}
\title{Get record evaluation result from booster}
\usage{
lgb.get.eval.result(booster, data_name, eval_name, iters = NULL,
is_err = FALSE)
}
\arguments{
\item{booster}{Object of class \code{lgb.Booster}}
\item{data_name}{name of dataset}
\item{eval_name}{name of evaluation}
\item{iters}{iterations, NULL will return all}
\item{is_err}{TRUE will return evaluation error instead}
}
\value{
vector of evaluation result
}
\description{
Get record evaluation result from booster
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Booster.R
\name{lgb.load}
\alias{lgb.load}
\title{Load LightGBM model}
\usage{
lgb.load(filename)
}
\arguments{
\item{filename}{path of model file}
}
\value{
booster
}
\description{
Load LightGBM model from saved model file
}
\examples{
library(lightgbm)
data(agaricus.train, package='lightgbm')
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label)
data(agaricus.test, package='lightgbm')
test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label)
params <- list(objective="regression", metric="l2")
valids <- list(test=dtest)
model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10)
lgb.save(model, "model.txt")
load_booster <- lgb.load("model.txt")
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Booster.R
\name{lgb.save}
\alias{lgb.save}
\title{Save LightGBM model}
\usage{
lgb.save(booster, filename, num_iteration = NULL)
}
\arguments{
\item{booster}{Object of class \code{lgb.Booster}}
\item{filename}{saved filename}
\item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration}
}
\value{
booster
}
\description{
Save LightGBM model
}
\examples{
library(lightgbm)
data(agaricus.train, package='lightgbm')
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label)
data(agaricus.test, package='lightgbm')
test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label)
params <- list(objective="regression", metric="l2")
valids <- list(test=dtest)
model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10)
lgb.save(model, "model.txt")
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.train.R, R/lightgbm.R
\name{lgb.train}
\alias{lgb.train}
\alias{lightgbm}
\title{Main training logic for LightGBM}
\usage{
lgb.train(params = list(), data, nrounds = 10, valids = list(),
obj = NULL, eval = NULL, verbose = 1, eval_freq = 1L,
init_model = NULL, colnames = NULL, categorical_feature = NULL,
early_stopping_rounds = NULL, callbacks = list(), ...)
lightgbm(data, label = NULL, weight = NULL, params = list(),
nrounds = 10, verbose = 1, eval_freq = 1L,
early_stopping_rounds = NULL, save_name = "lightgbm.model",
init_model = NULL, callbacks = list(), ...)
}
\arguments{
\item{params}{List of parameters}
\item{data}{a \code{lgb.Dataset} object, used for training}
\item{nrounds}{number of training rounds}
\item{valids}{a list of \code{lgb.Dataset} object, used for validation}
\item{obj}{objective function, can be character or custom objective function}
\item{eval}{evaluation function, can be (list of) character or custom eval function}
\item{verbose}{verbosity for output
if verbose > 0 , also will record iteration message to booster$record_evals}
\item{eval_freq}{evalutaion output frequence}
\item{init_model}{path of model file of \code{lgb.Booster} object, will continue train from this model}
\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
\item{categorical_feature}{list of str or int
type int represents index,
type str represents feature names}
\item{early_stopping_rounds}{int
Activates early stopping.
Requires at least one validation data and one metric
If there's more than one, will check all of them
Returns the model with (best_iter + early_stopping_rounds)
If early stopping occurs, the model will have 'best_iter' field}
\item{callbacks}{list of callback functions
List of callback functions that are applied at each iteration.}
\item{...}{other parameters, see parameters.md for more informations}
}
\value{
a trained booster model \code{lgb.Booster}.
}
\description{
Main training logic for LightGBM
}
\examples{
library(lightgbm)
data(agaricus.train, package='lightgbm')
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label)
data(agaricus.test, package='lightgbm')
test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label)
params <- list(objective="regression", metric="l2")
valids <- list(test=dtest)
model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10)
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Booster.R
\name{predict.lgb.Booster}
\alias{predict.lgb.Booster}
\title{Predict method for LightGBM model}
\usage{
\method{predict}{lgb.Booster}(object, data, num_iteration = NULL,
rawscore = FALSE, predleaf = FALSE, header = FALSE, reshape = FALSE)
}
\arguments{
\item{object}{Object of class \code{lgb.Booster}}
\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename}
\item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration}
\item{rawscore}{whether the prediction should be returned in the for of original untransformed
sum of predictions from boosting iterations' results. E.g., setting \code{rawscore=TRUE} for
logistic regression would result in predictions for log-odds instead of probabilities.}
\item{predleaf}{whether predict leaf index instead.}
\item{header}{only used for prediction for text file. True if text file has header}
\item{reshape}{whether to reshape the vector of predictions to a matrix form when there are several
prediction outputs per case.}
}
\value{
For regression or binary classification, it returns a vector of length \code{nrows(data)}.
For multiclass classification, either a \code{num_class * nrows(data)} vector or
a \code{(nrows(data), num_class)} dimension matrix is returned, depending on
the \code{reshape} value.
When \code{predleaf = TRUE}, the output is a matrix object with the
number of columns corresponding to the number of trees.
}
\description{
Predicted values based on class \code{lgb.Booster}
}
\examples{
library(lightgbm)
data(agaricus.train, package='lightgbm')
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label)
data(agaricus.test, package='lightgbm')
test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label)
params <- list(objective="regression", metric="l2")
valids <- list(test=dtest)
model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10)
preds <- predict(model, test$data)
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Dataset.R
\name{setinfo}
\alias{setinfo}
\alias{setinfo.lgb.Dataset}
\title{Set information of an lgb.Dataset object}
\usage{
setinfo(dataset, ...)
\method{setinfo}{lgb.Dataset}(dataset, name, info, ...)
}
\arguments{
\item{dataset}{Object of class "lgb.Dataset"}
\item{...}{other parameters}
\item{name}{the name of the field to get}
\item{info}{the specific field of information to set}
}
\value{
passed object
}
\description{
Set information of an lgb.Dataset object
}
\details{
The \code{name} field can be one of the following:
\itemize{
\item \code{label}: label lightgbm learn from ;
\item \code{weight}: to do a weight rescale ;
\item \code{init_score}: initial score is the base prediction lightgbm will boost from ;
\item \code{group}.
}
}
\examples{
data(agaricus.train, package='lightgbm')
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label)
lgb.Dataset.construct(dtrain)
labels <- getinfo(dtrain, 'label')
setinfo(dtrain, 'label', 1-labels)
labels2 <- getinfo(dtrain, 'label')
stopifnot(all.equal(labels2, 1-labels))
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Dataset.R
\name{slice}
\alias{slice}
\alias{slice.lgb.Dataset}
\title{Slice an dataset}
\usage{
slice(dataset, ...)
\method{slice}{lgb.Dataset}(dataset, idxset, ...)
}
\arguments{
\item{dataset}{Object of class "lgb.Dataset"}
\item{...}{other parameters (currently not used)}
\item{idxset}{a integer vector of indices of rows needed}
}
\value{
constructed sub dataset
}
\description{
Get a new Dataset containing the specified rows of
orginal lgb.Dataset object
}
\examples{
data(agaricus.train, package='lightgbm')
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label)
dsub <- slice(dtrain, 1:42)
labels1 <- getinfo(dsub, 'label')
}
# package root
PKGROOT=../../
ENABLE_STD_THREAD=1
CXX_STD = CXX11
LGBM_RFLAGS = -DUSE_SOCKET
PKG_CPPFLAGS= -I$(PKGROOT)/include $(LGBM_RFLAGS)
PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS) -std=c++11
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
OBJECTS = ./lightgbm-all.o ./lightgbm_R.o
# package root
PKGROOT=../../
ENABLE_STD_THREAD=1
CXX_STD = CXX11
LGBM_RFLAGS = -DUSE_SOCKET
PKG_CPPFLAGS= -I$(PKGROOT)/include $(LGBM_RFLAGS)
PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS) -std=c++11
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS) -lws2_32 -liphlpapi
OBJECTS = ./lightgbm-all.o ./lightgbm_R.o
/*
* A simple wrapper for access data in R object.
* Due to license issue(GPLv2), we cannot include R's header file, so use this simple wrapper instead.
* However, If R change its define of object, this file need to be updated as well.
*/
#ifndef R_OBJECT_HELPER_H_
#define R_OBJECT_HELPER_H_
#include <cstdint>
#define TYPE_BITS 5
struct sxpinfo_struct {
unsigned int type : 5;
unsigned int obj : 1;
unsigned int named : 2;
unsigned int gp : 16;
unsigned int mark : 1;
unsigned int debug : 1;
unsigned int trace : 1;
unsigned int spare : 1;
unsigned int gcgen : 1;
unsigned int gccls : 3;
};
struct primsxp_struct {
int offset;
};
struct symsxp_struct {
struct SEXPREC *pname;
struct SEXPREC *value;
struct SEXPREC *internal;
};
struct listsxp_struct {
struct SEXPREC *carval;
struct SEXPREC *cdrval;
struct SEXPREC *tagval;
};
struct envsxp_struct {
struct SEXPREC *frame;
struct SEXPREC *enclos;
struct SEXPREC *hashtab;
};
struct closxp_struct {
struct SEXPREC *formals;
struct SEXPREC *body;
struct SEXPREC *env;
};
struct promsxp_struct {
struct SEXPREC *value;
struct SEXPREC *expr;
struct SEXPREC *env;
};
typedef struct SEXPREC {
struct sxpinfo_struct sxpinfo;
struct SEXPREC* attrib;
struct SEXPREC* gengc_next_node, *gengc_prev_node;
union {
struct primsxp_struct primsxp;
struct symsxp_struct symsxp;
struct listsxp_struct listsxp;
struct envsxp_struct envsxp;
struct closxp_struct closxp;
struct promsxp_struct promsxp;
} u;
} SEXPREC, *SEXP;
struct vecsxp_struct {
int length;
int truelength;
};
typedef struct VECTOR_SEXPREC {
struct sxpinfo_struct sxpinfo;
struct SEXPREC* attrib;
struct SEXPREC* gengc_next_node, *gengc_prev_node;
struct vecsxp_struct vecsxp;
} VECTOR_SEXPREC, *VECSEXP;
typedef union { VECTOR_SEXPREC s; double align; } SEXPREC_ALIGN;
#define DATAPTR(x) (((SEXPREC_ALIGN *) (x)) + 1)
#define R_CHAR_PTR(x) ((char *) DATAPTR(x))
#define R_INT_PTR(x) ((int *) DATAPTR(x))
#define R_REAL_PTR(x) ((double *) DATAPTR(x))
#define R_AS_INT(x) (*((int *) DATAPTR(x)))
#define R_IS_NULL(x) ((*(SEXP)(x)).sxpinfo.type == 0)
// 64bit pointer
#if INTPTR_MAX == INT64_MAX
#define R_ADDR(x) ((int64_t *) DATAPTR(x))
inline void R_SET_PTR(SEXP x, void* ptr) {
if (ptr == nullptr) {
R_ADDR(x)[0] = (int64_t)(NULL);
} else {
R_ADDR(x)[0] = (int64_t)(ptr);
}
}
inline void* R_GET_PTR(SEXP x) {
if (R_IS_NULL(x)) {
return nullptr;
} else {
auto ret = (void *)(R_ADDR(x)[0]);
if (ret == NULL) {
ret = nullptr;
}
return ret;
}
}
#else
#define R_ADDR(x) ((int32_t *) DATAPTR(x))
inline void R_SET_PTR(SEXP x, void* ptr) {
if (ptr == nullptr) {
R_ADDR(x)[0] = (int32_t)(NULL);
} else {
R_ADDR(x)[0] = (int32_t)(ptr);
}
}
inline void* R_GET_PTR(SEXP x) {
if (R_IS_NULL(x)) {
return nullptr;
} else {
auto ret = (void *)(R_ADDR(x)[0]);
if (ret == NULL) {
ret = nullptr;
}
return ret;
}
}
#endif
#endif // R_OBJECT_HELPER_H_
// application
#include "../../src/application/application.cpp"
// boosting
#include "../../src/boosting/boosting.cpp"
#include "../../src/boosting/gbdt.cpp"
// io
#include "../../src/io/bin.cpp"
#include "../../src/io/config.cpp"
#include "../../src/io/dataset.cpp"
#include "../../src/io/dataset_loader.cpp"
#include "../../src/io/metadata.cpp"
#include "../../src/io/parser.cpp"
#include "../../src/io/tree.cpp"
// metric
#include "../../src/metric/dcg_calculator.cpp"
#include "../../src/metric/metric.cpp"
// network
#include "../../src/network/linker_topo.cpp"
#include "../../src/network/linkers_socket.cpp"
#include "../../src/network/network.cpp"
// objective
#include "../../src/objective/objective_function.cpp"
// treelearner
#include "../../src/treelearner/data_parallel_tree_learner.cpp"
#include "../../src/treelearner/feature_parallel_tree_learner.cpp"
#include "../../src/treelearner/serial_tree_learner.cpp"
#include "../../src/treelearner/tree_learner.cpp"
#include "../../src/treelearner/voting_parallel_tree_learner.cpp"
// c_api
#include "../../src/c_api.cpp"
#include <vector>
#include <string>
#include <utility>
#include <cstring>
#include <cstdio>
#include <sstream>
#include <omp.h>
#include <cstdint>
#include <memory>
#include <LightGBM/utils/text_reader.h>
#include <LightGBM/utils/common.h>
#include "./lightgbm_R.h"
#define COL_MAJOR (0)
#define R_API_BEGIN() \
try {
#define R_API_END() } \
catch(std::exception& ex) { R_INT_PTR(call_state)[0] = -1; LGBM_SetLastError(ex.what()); return call_state;} \
catch(std::string& ex) { R_INT_PTR(call_state)[0] = -1; LGBM_SetLastError(ex.c_str()); return call_state; } \
catch(...) { R_INT_PTR(call_state)[0] = -1; LGBM_SetLastError("unknown exception"); return call_state;} \
return call_state;
#define CHECK_CALL(x) \
if ((x) != 0) { \
R_INT_PTR(call_state)[0] = -1; \
return call_state; \
}
using namespace LightGBM;
SEXP EncodeChar(SEXP dest, const char* src, SEXP buf_len, SEXP actual_len) {
int str_len = static_cast<int>(std::strlen(src));
R_INT_PTR(actual_len)[0] = str_len;
if (R_AS_INT(buf_len) < str_len) { return dest; }
auto ptr = R_CHAR_PTR(dest);
int i = 0;
while (src[i] != '\0') {
ptr[i] = src[i];
++i;
}
return dest;
}
SEXP LGBM_GetLastError_R(SEXP buf_len, SEXP actual_len, SEXP err_msg) {
return EncodeChar(err_msg, LGBM_GetLastError(), buf_len, actual_len);
}
SEXP LGBM_DatasetCreateFromFile_R(SEXP filename,
SEXP parameters,
SEXP reference,
SEXP out,
SEXP call_state) {
R_API_BEGIN();
DatasetHandle handle;
CHECK_CALL(LGBM_DatasetCreateFromFile(R_CHAR_PTR(filename), R_CHAR_PTR(parameters),
R_GET_PTR(reference), &handle));
R_SET_PTR(out, handle);
R_API_END();
}
SEXP LGBM_DatasetCreateFromCSC_R(SEXP indptr,
SEXP indices,
SEXP data,
SEXP num_indptr,
SEXP nelem,
SEXP num_row,
SEXP parameters,
SEXP reference,
SEXP out,
SEXP call_state) {
R_API_BEGIN();
const int* p_indptr = R_INT_PTR(indptr);
const int* p_indices = R_INT_PTR(indices);
const double* p_data = R_REAL_PTR(data);
int64_t nindptr = static_cast<int64_t>(R_AS_INT(num_indptr));
int64_t ndata = static_cast<int64_t>(R_AS_INT(nelem));
int64_t nrow = static_cast<int64_t>(R_AS_INT(num_row));
DatasetHandle handle;
CHECK_CALL(LGBM_DatasetCreateFromCSC(p_indptr, C_API_DTYPE_INT32, p_indices,
p_data, C_API_DTYPE_FLOAT64, nindptr, ndata,
nrow, R_CHAR_PTR(parameters), R_GET_PTR(reference), &handle));
R_SET_PTR(out, handle);
R_API_END();
}
SEXP LGBM_DatasetCreateFromMat_R(SEXP data,
SEXP num_row,
SEXP num_col,
SEXP parameters,
SEXP reference,
SEXP out,
SEXP call_state) {
R_API_BEGIN();
int32_t nrow = static_cast<int32_t>(R_AS_INT(num_row));
int32_t ncol = static_cast<int32_t>(R_AS_INT(num_col));
double* p_mat = R_REAL_PTR(data);
DatasetHandle handle;
CHECK_CALL(LGBM_DatasetCreateFromMat(p_mat, C_API_DTYPE_FLOAT64, nrow, ncol, COL_MAJOR,
R_CHAR_PTR(parameters), R_GET_PTR(reference), &handle));
R_SET_PTR(out, handle);
R_API_END();
}
SEXP LGBM_DatasetGetSubset_R(SEXP handle,
SEXP used_row_indices,
SEXP len_used_row_indices,
SEXP parameters,
SEXP out,
SEXP call_state) {
R_API_BEGIN();
int len = R_AS_INT(len_used_row_indices);
std::vector<int> idxvec(len);
// convert from one-based to zero-based index
#pragma omp parallel for schedule(static)
for (int i = 0; i < len; ++i) {
idxvec[i] = R_INT_PTR(used_row_indices)[i] - 1;
}
DatasetHandle res;
CHECK_CALL(LGBM_DatasetGetSubset(R_GET_PTR(handle),
idxvec.data(), len, R_CHAR_PTR(parameters),
&res));
R_SET_PTR(out, res);
R_API_END();
}
SEXP LGBM_DatasetSetFeatureNames_R(SEXP handle,
SEXP feature_names,
SEXP call_state) {
R_API_BEGIN();
auto vec_names = Common::Split(R_CHAR_PTR(feature_names), "\t");
std::vector<const char*> vec_sptr;
int len = static_cast<int>(vec_names.size());
for (int i = 0; i < len; ++i) {
vec_sptr.push_back(vec_names[i].c_str());
}
CHECK_CALL(LGBM_DatasetSetFeatureNames(R_GET_PTR(handle),
vec_sptr.data(), len));
R_API_END();
}
SEXP LGBM_DatasetGetFeatureNames_R(SEXP handle,
SEXP buf_len,
SEXP actual_len,
SEXP feature_names,
SEXP call_state) {
R_API_BEGIN();
int len = 0;
CHECK_CALL(LGBM_DatasetGetNumFeature(R_GET_PTR(handle), &len));
std::vector<std::vector<char>> names(len);
std::vector<char*> ptr_names(len);
for (int i = 0; i < len; ++i) {
names[i].resize(256);
ptr_names[i] = names[i].data();
}
int out_len;
CHECK_CALL(LGBM_DatasetGetFeatureNames(R_GET_PTR(handle),
ptr_names.data(), &out_len));
CHECK(len == out_len);
auto merge_str = Common::Join<char*>(ptr_names, "\t");
EncodeChar(feature_names, merge_str.c_str(), buf_len, actual_len);
R_API_END();
}
SEXP LGBM_DatasetSaveBinary_R(SEXP handle,
SEXP filename,
SEXP call_state) {
R_API_BEGIN();
CHECK_CALL(LGBM_DatasetSaveBinary(R_GET_PTR(handle),
R_CHAR_PTR(filename)));
R_API_END();
}
SEXP LGBM_DatasetFree_R(SEXP handle,
SEXP call_state) {
R_API_BEGIN();
if (R_GET_PTR(handle) != nullptr) {
CHECK_CALL(LGBM_DatasetFree(R_GET_PTR(handle)));
R_SET_PTR(handle, nullptr);
}
R_API_END();
}
SEXP LGBM_DatasetSetField_R(SEXP handle,
SEXP field_name,
SEXP field_data,
SEXP num_element,
SEXP call_state) {
R_API_BEGIN();
int len = static_cast<int>(R_AS_INT(num_element));
const char* name = R_CHAR_PTR(field_name);
if (!strcmp("group", name) || !strcmp("query", name)) {
std::vector<int32_t> vec(len);
#pragma omp parallel for schedule(static)
for (int i = 0; i < len; ++i) {
vec[i] = static_cast<int32_t>(R_INT_PTR(field_data)[i]);
}
CHECK_CALL(LGBM_DatasetSetField(R_GET_PTR(handle), name, vec.data(), len, C_API_DTYPE_INT32));
} else {
std::vector<float> vec(len);
#pragma omp parallel for schedule(static)
for (int i = 0; i < len; ++i) {
vec[i] = static_cast<float>(R_REAL_PTR(field_data)[i]);
}
CHECK_CALL(LGBM_DatasetSetField(R_GET_PTR(handle), name, vec.data(), len, C_API_DTYPE_FLOAT32));
}
R_API_END();
}
SEXP LGBM_DatasetGetField_R(SEXP handle,
SEXP field_name,
SEXP field_data,
SEXP call_state) {
R_API_BEGIN();
const char* name = R_CHAR_PTR(field_name);
int out_len = 0;
int out_type = 0;
const void* res;
CHECK_CALL(LGBM_DatasetGetField(R_GET_PTR(handle), name, &out_len, &res, &out_type));
if (!strcmp("group", name) || !strcmp("query", name)) {
auto p_data = reinterpret_cast<const int32_t*>(res);
// convert from boundaries to size
#pragma omp parallel for schedule(static)
for (int i = 0; i < out_len - 1; ++i) {
R_INT_PTR(field_data)[i] = p_data[i + 1] - p_data[i];
}
} else {
auto p_data = reinterpret_cast<const float*>(res);
#pragma omp parallel for schedule(static)
for (int i = 0; i < out_len; ++i) {
R_REAL_PTR(field_data)[i] = p_data[i];
}
}
R_API_END();
}
SEXP LGBM_DatasetGetFieldSize_R(SEXP handle,
SEXP field_name,
SEXP out,
SEXP call_state) {
R_API_BEGIN();
const char* name = R_CHAR_PTR(field_name);
int out_len = 0;
int out_type = 0;
const void* res;
CHECK_CALL(LGBM_DatasetGetField(R_GET_PTR(handle), name, &out_len, &res, &out_type));
if (!strcmp("group", name) || !strcmp("query", name)) {
out_len -= 1;
}
R_INT_PTR(out)[0] = static_cast<int>(out_len);
R_API_END();
}
SEXP LGBM_DatasetGetNumData_R(SEXP handle, SEXP out,
SEXP call_state) {
int nrow;
R_API_BEGIN();
CHECK_CALL(LGBM_DatasetGetNumData(R_GET_PTR(handle), &nrow));
R_INT_PTR(out)[0] = static_cast<int>(nrow);
R_API_END();
}
SEXP LGBM_DatasetGetNumFeature_R(SEXP handle,
SEXP out,
SEXP call_state) {
int nfeature;
R_API_BEGIN();
CHECK_CALL(LGBM_DatasetGetNumFeature(R_GET_PTR(handle), &nfeature));
R_INT_PTR(out)[0] = static_cast<int>(nfeature);
R_API_END();
}
// --- start Booster interfaces
SEXP LGBM_BoosterFree_R(SEXP handle,
SEXP call_state) {
R_API_BEGIN();
if (R_GET_PTR(handle) != nullptr) {
CHECK_CALL(LGBM_BoosterFree(R_GET_PTR(handle)));
R_SET_PTR(handle, nullptr);
}
R_API_END();
}
SEXP LGBM_BoosterCreate_R(SEXP train_data,
SEXP parameters,
SEXP out,
SEXP call_state) {
R_API_BEGIN();
BoosterHandle handle;
CHECK_CALL(LGBM_BoosterCreate(R_GET_PTR(train_data), R_CHAR_PTR(parameters), &handle));
R_SET_PTR(out, handle);
R_API_END();
}
SEXP LGBM_BoosterCreateFromModelfile_R(SEXP filename,
SEXP out,
SEXP call_state) {
R_API_BEGIN();
int out_num_iterations = 0;
BoosterHandle handle;
CHECK_CALL(LGBM_BoosterCreateFromModelfile(R_CHAR_PTR(filename), &out_num_iterations, &handle));
R_SET_PTR(out, handle);
R_API_END();
}
SEXP LGBM_BoosterMerge_R(SEXP handle,
SEXP other_handle,
SEXP call_state) {
R_API_BEGIN();
CHECK_CALL(LGBM_BoosterMerge(R_GET_PTR(handle), R_GET_PTR(other_handle)));
R_API_END();
}
SEXP LGBM_BoosterAddValidData_R(SEXP handle,
SEXP valid_data,
SEXP call_state) {
R_API_BEGIN();
CHECK_CALL(LGBM_BoosterAddValidData(R_GET_PTR(handle), R_GET_PTR(valid_data)));
R_API_END();
}
SEXP LGBM_BoosterResetTrainingData_R(SEXP handle,
SEXP train_data,
SEXP call_state) {
R_API_BEGIN();
CHECK_CALL(LGBM_BoosterResetTrainingData(R_GET_PTR(handle), R_GET_PTR(train_data)));
R_API_END();
}
SEXP LGBM_BoosterResetParameter_R(SEXP handle,
SEXP parameters,
SEXP call_state) {
R_API_BEGIN();
CHECK_CALL(LGBM_BoosterResetParameter(R_GET_PTR(handle), R_CHAR_PTR(parameters)));
R_API_END();
}
SEXP LGBM_BoosterGetNumClasses_R(SEXP handle,
SEXP out,
SEXP call_state) {
int num_class;
R_API_BEGIN();
CHECK_CALL(LGBM_BoosterGetNumClasses(R_GET_PTR(handle), &num_class));
R_INT_PTR(out)[0] = static_cast<int>(num_class);
R_API_END();
}
SEXP LGBM_BoosterUpdateOneIter_R(SEXP handle,
SEXP call_state) {
int is_finished = 0;
R_API_BEGIN();
CHECK_CALL(LGBM_BoosterUpdateOneIter(R_GET_PTR(handle), &is_finished));
R_API_END();
}
SEXP LGBM_BoosterUpdateOneIterCustom_R(SEXP handle,
SEXP grad,
SEXP hess,
SEXP len,
SEXP call_state) {
int is_finished = 0;
R_API_BEGIN();
int int_len = R_AS_INT(len);
std::vector<float> tgrad(int_len), thess(int_len);
#pragma omp parallel for schedule(static)
for (int j = 0; j < int_len; ++j) {
tgrad[j] = static_cast<float>(R_REAL_PTR(grad)[j]);
thess[j] = static_cast<float>(R_REAL_PTR(hess)[j]);
}
CHECK_CALL(LGBM_BoosterUpdateOneIterCustom(R_GET_PTR(handle), tgrad.data(), thess.data(), &is_finished));
R_API_END();
}
SEXP LGBM_BoosterRollbackOneIter_R(SEXP handle,
SEXP call_state) {
R_API_BEGIN();
CHECK_CALL(LGBM_BoosterRollbackOneIter(R_GET_PTR(handle)));
R_API_END();
}
SEXP LGBM_BoosterGetCurrentIteration_R(SEXP handle,
SEXP out,
SEXP call_state) {
int out_iteration;
R_API_BEGIN();
CHECK_CALL(LGBM_BoosterGetCurrentIteration(R_GET_PTR(handle), &out_iteration));
R_INT_PTR(out)[0] = static_cast<int>(out_iteration);
R_API_END();
}
SEXP LGBM_BoosterGetEvalNames_R(SEXP handle,
SEXP buf_len,
SEXP actual_len,
SEXP eval_names,
SEXP call_state) {
R_API_BEGIN();
int len;
CHECK_CALL(LGBM_BoosterGetEvalCounts(R_GET_PTR(handle), &len));
std::vector<std::vector<char>> names(len);
std::vector<char*> ptr_names(len);
for (int i = 0; i < len; ++i) {
names[i].resize(128);
ptr_names[i] = names[i].data();
}
int out_len;
CHECK_CALL(LGBM_BoosterGetEvalNames(R_GET_PTR(handle), &out_len, ptr_names.data()));
CHECK(out_len == len);
auto merge_names = Common::Join<char*>(ptr_names, "\t");
EncodeChar(eval_names, merge_names.c_str(), buf_len, actual_len);
R_API_END();
}
SEXP LGBM_BoosterGetEval_R(SEXP handle,
SEXP data_idx,
SEXP out_result,
SEXP call_state) {
R_API_BEGIN();
int len;
CHECK_CALL(LGBM_BoosterGetEvalCounts(R_GET_PTR(handle), &len));
double* ptr_ret = R_REAL_PTR(out_result);
int out_len;
CHECK_CALL(LGBM_BoosterGetEval(R_GET_PTR(handle), R_AS_INT(data_idx), &out_len, ptr_ret));
CHECK(out_len == len);
R_API_END();
}
SEXP LGBM_BoosterGetNumPredict_R(SEXP handle,
SEXP data_idx,
SEXP out,
SEXP call_state) {
R_API_BEGIN();
int64_t len;
CHECK_CALL(LGBM_BoosterGetNumPredict(R_GET_PTR(handle), R_AS_INT(data_idx), &len));
R_INT_PTR(out)[0] = static_cast<int>(len);
R_API_END();
}
SEXP LGBM_BoosterGetPredict_R(SEXP handle,
SEXP data_idx,
SEXP out_result,
SEXP call_state) {
R_API_BEGIN();
double* ptr_ret = R_REAL_PTR(out_result);
int64_t out_len;
CHECK_CALL(LGBM_BoosterGetPredict(R_GET_PTR(handle), R_AS_INT(data_idx), &out_len, ptr_ret));
R_API_END();
}
int GetPredictType(SEXP is_rawscore, SEXP is_leafidx) {
int pred_type = C_API_PREDICT_NORMAL;
if (R_AS_INT(is_rawscore)) {
pred_type = C_API_PREDICT_RAW_SCORE;
}
if (R_AS_INT(is_leafidx)) {
pred_type = C_API_PREDICT_LEAF_INDEX;
}
return pred_type;
}
SEXP LGBM_BoosterPredictForFile_R(SEXP handle,
SEXP data_filename,
SEXP data_has_header,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP num_iteration,
SEXP result_filename,
SEXP call_state) {
R_API_BEGIN();
int pred_type = GetPredictType(is_rawscore, is_leafidx);
CHECK_CALL(LGBM_BoosterPredictForFile(R_GET_PTR(handle), R_CHAR_PTR(data_filename),
R_AS_INT(data_has_header), pred_type, R_AS_INT(num_iteration),
R_CHAR_PTR(result_filename)));
R_API_END();
}
SEXP LGBM_BoosterCalcNumPredict_R(SEXP handle,
SEXP num_row,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP num_iteration,
SEXP out_len,
SEXP call_state) {
R_API_BEGIN();
int pred_type = GetPredictType(is_rawscore, is_leafidx);
int64_t len = 0;
CHECK_CALL(LGBM_BoosterCalcNumPredict(R_GET_PTR(handle), R_AS_INT(num_row),
pred_type, R_AS_INT(num_iteration), &len));
R_INT_PTR(out_len)[0] = static_cast<int>(len);
R_API_END();
}
SEXP LGBM_BoosterPredictForCSC_R(SEXP handle,
SEXP indptr,
SEXP indices,
SEXP data,
SEXP num_indptr,
SEXP nelem,
SEXP num_row,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP num_iteration,
SEXP out_result,
SEXP call_state) {
R_API_BEGIN();
int pred_type = GetPredictType(is_rawscore, is_leafidx);
const int* p_indptr = R_INT_PTR(indptr);
const int* p_indices = R_INT_PTR(indices);
const double* p_data = R_REAL_PTR(data);
int64_t nindptr = R_AS_INT(num_indptr);
int64_t ndata = R_AS_INT(nelem);
int64_t nrow = R_AS_INT(num_row);
double* ptr_ret = R_REAL_PTR(out_result);
int64_t out_len;
CHECK_CALL(LGBM_BoosterPredictForCSC(R_GET_PTR(handle),
p_indptr, C_API_DTYPE_INT32, p_indices,
p_data, C_API_DTYPE_FLOAT64, nindptr, ndata,
nrow, pred_type, R_AS_INT(num_iteration), &out_len, ptr_ret));
R_API_END();
}
SEXP LGBM_BoosterPredictForMat_R(SEXP handle,
SEXP data,
SEXP num_row,
SEXP num_col,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP num_iteration,
SEXP out_result,
SEXP call_state) {
R_API_BEGIN();
int pred_type = GetPredictType(is_rawscore, is_leafidx);
int32_t nrow = R_AS_INT(num_row);
int32_t ncol = R_AS_INT(num_col);
double* p_mat = R_REAL_PTR(data);
double* ptr_ret = R_REAL_PTR(out_result);
int64_t out_len;
CHECK_CALL(LGBM_BoosterPredictForMat(R_GET_PTR(handle),
p_mat, C_API_DTYPE_FLOAT64, nrow, ncol, COL_MAJOR,
pred_type, R_AS_INT(num_iteration), &out_len, ptr_ret));
R_API_END();
}
SEXP LGBM_BoosterSaveModel_R(SEXP handle,
SEXP num_iteration,
SEXP filename,
SEXP call_state) {
R_API_BEGIN();
CHECK_CALL(LGBM_BoosterSaveModel(R_GET_PTR(handle), R_AS_INT(num_iteration), R_CHAR_PTR(filename)));
R_API_END();
}
SEXP LGBM_BoosterDumpModel_R(SEXP handle,
SEXP num_iteration,
SEXP buffer_len,
SEXP actual_len,
SEXP out_str,
SEXP call_state) {
R_API_BEGIN();
int out_len = 0;
std::vector<char> inner_char_buf(R_AS_INT(buffer_len));
CHECK_CALL(LGBM_BoosterDumpModel(R_GET_PTR(handle), R_AS_INT(num_iteration), R_AS_INT(buffer_len), &out_len, inner_char_buf.data()));
EncodeChar(out_str, inner_char_buf.data(), buffer_len, actual_len);
if (out_len < R_AS_INT(buffer_len)) {
EncodeChar(out_str, inner_char_buf.data(), buffer_len, actual_len);
} else {
R_INT_PTR(actual_len)[0] = static_cast<int>(out_len);
}
R_API_END();
}
#ifndef LIGHTGBM_R_H_
#define LIGHTGBM_R_H_
#include <LightGBM/utils/log.h>
#include <cstdint>
#include <LightGBM/c_api.h>
#include "R_object_helper.h"
/*!
* \brief get string message of the last error
* all function in this file will return 0 when succeed
* and -1 when an error occured,
* \return err_msg error inforomation
* \return error inforomation
*/
DllExport SEXP LGBM_GetLastError_R(SEXP buf_len, SEXP actual_len, SEXP err_msg);
// --- start Dataset interface
/*!
* \brief load data set from file like the command_line LightGBM do
* \param filename the name of the file
* \param parameters additional parameters
* \param reference used to align bin mapper with other dataset, nullptr means don't used
* \param out created dataset
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_DatasetCreateFromFile_R(SEXP filename,
SEXP parameters,
SEXP reference,
SEXP out,
SEXP call_state);
/*!
* \brief create a dataset from CSC format
* \param indptr pointer to row headers
* \param indices findex
* \param data fvalue
* \param nindptr number of cols in the matrix + 1
* \param nelem number of nonzero elements in the matrix
* \param num_row number of rows
* \param parameters additional parameters
* \param reference used to align bin mapper with other dataset, nullptr means don't used
* \param out created dataset
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_DatasetCreateFromCSC_R(SEXP indptr,
SEXP indices,
SEXP data,
SEXP nindptr,
SEXP nelem,
SEXP num_row,
SEXP parameters,
SEXP reference,
SEXP out,
SEXP call_state);
/*!
* \brief create dataset from dense matrix
* \param data matric data
* \param nrow number of rows
* \param ncol number columns
* \param parameters additional parameters
* \param reference used to align bin mapper with other dataset, nullptr means don't used
* \param out created dataset
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_DatasetCreateFromMat_R(SEXP data,
SEXP nrow,
SEXP ncol,
SEXP parameters,
SEXP reference,
SEXP out,
SEXP call_state);
/*!
* \brief Create subset of a data
* \param handle handle of full dataset
* \param used_row_indices Indices used in subset
* \param len_used_row_indices length of Indices used in subset
* \param parameters additional parameters
* \param out created dataset
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_DatasetGetSubset_R(SEXP handle,
SEXP used_row_indices,
SEXP len_used_row_indices,
SEXP parameters,
SEXP out,
SEXP call_state);
/*!
* \brief save feature names to Dataset
* \param handle handle
* \param feature_names feature names
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_DatasetSetFeatureNames_R(SEXP handle,
SEXP feature_names,
SEXP call_state);
/*!
* \brief save feature names to Dataset
* \param handle handle
* \param feature_names feature names
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_DatasetGetFeatureNames_R(SEXP handle,
SEXP buf_len,
SEXP actual_len,
SEXP feature_names,
SEXP call_state);
/*!
* \brief save dateset to binary file
* \param handle a instance of dataset
* \param filename file name
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_DatasetSaveBinary_R(SEXP handle,
SEXP filename,
SEXP call_state);
/*!
* \brief free dataset
* \param handle a instance of dataset
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_DatasetFree_R(SEXP handle,
SEXP call_state);
/*!
* \brief set vector to a content in info
* Note: group and group only work for C_API_DTYPE_INT32
* label and weight only work for C_API_DTYPE_FLOAT32
* \param handle a instance of dataset
* \param field_name field name, can be label, weight, group, group_id
* \param field_data pointer to vector
* \param num_element number of element in field_data
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_DatasetSetField_R(SEXP handle,
SEXP field_name,
SEXP field_data,
SEXP num_element,
SEXP call_state);
/*!
* \brief get size of info vector from dataset
* \param handle a instance of dataset
* \param field_name field name
* \param out size of info vector from dataset
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_DatasetGetFieldSize_R(SEXP handle,
SEXP field_name,
SEXP out,
SEXP call_state);
/*!
* \brief get info vector from dataset
* \param handle a instance of dataset
* \param field_name field name
* \param field_data pointer to vector
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_DatasetGetField_R(SEXP handle,
SEXP field_name,
SEXP field_data,
SEXP call_state);
/*!
* \brief get number of data.
* \param handle the handle to the dataset
* \param out The address to hold number of data
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_DatasetGetNumData_R(SEXP handle,
SEXP out,
SEXP call_state);
/*!
* \brief get number of features
* \param handle the handle to the dataset
* \param out The output of number of features
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_DatasetGetNumFeature_R(SEXP handle,
SEXP out,
SEXP call_state);
// --- start Booster interfaces
/*!
* \brief create an new boosting learner
* \param train_data training data set
* \param parameters format: 'key1=value1 key2=value2'
* \prama out handle of created Booster
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterCreate_R(SEXP train_data,
SEXP parameters,
SEXP out,
SEXP call_state);
/*!
* \brief free obj in handle
* \param handle handle to be freed
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterFree_R(SEXP handle,
SEXP call_state);
/*!
* \brief load an existing boosting from model file
* \param filename filename of model
* \prama out handle of created Booster
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterCreateFromModelfile_R(SEXP filename,
SEXP out,
SEXP call_state);
/*!
* \brief Merge model in two booster to first handle
* \param handle handle, will merge other handle to this
* \param other_handle
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterMerge_R(SEXP handle,
SEXP other_handle,
SEXP call_state);
/*!
* \brief Add new validation to booster
* \param handle handle
* \param valid_data validation data set
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterAddValidData_R(SEXP handle,
SEXP valid_data,
SEXP call_state);
/*!
* \brief Reset training data for booster
* \param handle handle
* \param train_data training data set
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterResetTrainingData_R(SEXP handle,
SEXP train_data,
SEXP call_state);
/*!
* \brief Reset config for current booster
* \param handle handle
* \param parameters format: 'key1=value1 key2=value2'
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterResetParameter_R(SEXP handle,
SEXP parameters,
SEXP call_state);
/*!
* \brief Get number of class
* \param handle handle
* \param out number of classes
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterGetNumClasses_R(SEXP handle,
SEXP out,
SEXP call_state);
/*!
* \brief update the model in one round
* \param handle handle
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterUpdateOneIter_R(SEXP handle,
SEXP call_state);
/*!
* \brief update the model, by directly specify gradient and second order gradient,
* this can be used to support customized loss function
* \param handle handle
* \param grad gradient statistics
* \param hess second order gradient statistics
* \param len length of grad/hess
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterUpdateOneIterCustom_R(SEXP handle,
SEXP grad,
SEXP hess,
SEXP len,
SEXP call_state);
/*!
* \brief Rollback one iteration
* \param handle handle
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterRollbackOneIter_R(SEXP handle,
SEXP call_state);
/*!
* \brief Get iteration of current boosting rounds
* \param out iteration of boosting rounds
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterGetCurrentIteration_R(SEXP handle,
SEXP out,
SEXP call_state);
/*!
* \brief Get Name of eval
* \param eval_names eval names
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterGetEvalNames_R(SEXP handle,
SEXP buf_len,
SEXP actual_len,
SEXP eval_names,
SEXP call_state);
/*!
* \brief get evaluation for training data and validation data
* \param handle handle
* \param data_idx 0:training data, 1: 1st valid data, 2:2nd valid data ...
* \param out_result float arrary contains result
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterGetEval_R(SEXP handle,
SEXP data_idx,
SEXP out_result,
SEXP call_state);
/*!
* \brief Get number of prediction for training data and validation data
* \param handle handle
* \param data_idx 0:training data, 1: 1st valid data, 2:2nd valid data ...
* \param out size of predict
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterGetNumPredict_R(SEXP handle,
SEXP data_idx,
SEXP out,
SEXP call_state);
/*!
* \brief Get prediction for training data and validation data
this can be used to support customized eval function
* \param handle handle
* \param data_idx 0:training data, 1: 1st valid data, 2:2nd valid data ...
* \param out_result, used to store predict result, should pre-allocate memory
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterGetPredict_R(SEXP handle,
SEXP data_idx,
SEXP out_result,
SEXP call_state);
/*!
* \brief make prediction for file
* \param handle handle
* \param data_filename filename of data file
* \param data_has_header data file has header or not
* \param is_rawscore
* \param is_leafidx
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \return 0 when succeed, -1 when failure happens
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterPredictForFile_R(SEXP handle,
SEXP data_filename,
SEXP data_has_header,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP num_iteration,
SEXP result_filename,
SEXP call_state);
/*!
* \brief Get number of prediction
* \param handle handle
* \param num_row
* \param is_rawscore
* \param is_leafidx
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param out_len lenght of prediction
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterCalcNumPredict_R(SEXP handle,
SEXP num_row,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP num_iteration,
SEXP out_len,
SEXP call_state);
/*!
* \brief make prediction for an new data set
* Note: should pre-allocate memory for out_result,
* for noraml and raw score: its length is equal to num_class * num_data
* for leaf index, its length is equal to num_class * num_data * num_iteration
* \param handle handle
* \param indptr pointer to row headers
* \param indices findex
* \param data fvalue
* \param nindptr number of cols in the matrix + 1
* \param nelem number of nonzero elements in the matrix
* \param num_row number of rows
* \param is_rawscore
* \param is_leafidx
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param out prediction result
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterPredictForCSC_R(SEXP handle,
SEXP indptr,
SEXP indices,
SEXP data,
SEXP nindptr,
SEXP nelem,
SEXP num_row,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP num_iteration,
SEXP out_result,
SEXP call_state);
/*!
* \brief make prediction for an new data set
* Note: should pre-allocate memory for out_result,
* for noraml and raw score: its length is equal to num_class * num_data
* for leaf index, its length is equal to num_class * num_data * num_iteration
* \param handle handle
* \param data pointer to the data space
* \param nrow number of rows
* \param ncol number columns
* \param is_rawscore
* \param is_leafidx
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param out prediction result
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterPredictForMat_R(SEXP handle,
SEXP data,
SEXP nrow,
SEXP ncol,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP num_iteration,
SEXP out_result,
SEXP call_state);
/*!
* \brief save model into file
* \param handle handle
* \param num_iteration, <= 0 means save all
* \param filename file name
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterSaveModel_R(SEXP handle,
SEXP num_iteration,
SEXP filename,
SEXP call_state);
/*!
* \brief dump model to json
* \param handle handle
* \param num_iteration, <= 0 means save all
* \param out_str json format string of model
* \return 0 when succeed, -1 when failure happens
*/
DllExport SEXP LGBM_BoosterDumpModel_R(SEXP handle,
SEXP num_iteration,
SEXP buffer_len,
SEXP actual_len,
SEXP out_str,
SEXP call_state);
#endif // LIGHTGBM_R_H_
\ No newline at end of file
library(testthat)
library(lightgbm)
test_check("lightgbm")
require(lightgbm)
context("basic functions")
data(agaricus.train, package='lightgbm')
data(agaricus.test, package='lightgbm')
train <- agaricus.train
test <- agaricus.test
windows_flag = grepl('Windows', Sys.info()[['sysname']])
test_that("train and predict binary classification", {
nrounds = 10
bst <- lightgbm(data = train$data, label = train$label, num_leaves = 5,
nrounds = nrounds, objective = "binary", metric="binary_error")
expect_false(is.null(bst$record_evals))
record_results <- lgb.get.eval.result(bst, "train", "binary_error")
expect_lt(min(record_results), 0.02)
pred <- predict(bst, test$data)
expect_length(pred, 1611)
pred1 <- predict(bst, train$data, num_iteration = 1)
expect_length(pred1, 6513)
err_pred1 <- sum((pred1 > 0.5) != train$label)/length(train$label)
err_log <- record_results[1]
expect_lt(abs(err_pred1 - err_log), 10e-6)
})
test_that("train and predict softmax", {
lb <- as.numeric(iris$Species) - 1
bst <- lightgbm(data = as.matrix(iris[, -5]), label = lb,
num_leaves = 4, learning_rate = 0.1, nrounds = 20, min_data=20, min_hess=20,
objective = "multiclass", metric="multi_error", num_class=3)
expect_false(is.null(bst$record_evals))
record_results <- lgb.get.eval.result(bst, "train", "multi_error")
expect_lt(min(record_results), 0.03)
pred <- predict(bst, as.matrix(iris[, -5]))
expect_length(pred, nrow(iris) * 3)
})
test_that("use of multiple eval metrics works", {
bst <- lightgbm(data = train$data, label = train$label, num_leaves = 4,
learning_rate=1, nrounds = 10, objective = "binary",
metric = list("binary_error","auc","binary_logloss") )
expect_false(is.null(bst$record_evals))
})
test_that("training continuation works", {
dtrain <- lgb.Dataset(train$data, label = train$label, free_raw_data=FALSE)
watchlist = list(train=dtrain)
param <- list(objective = "binary", metric="binary_logloss", num_leaves = 5, learning_rate = 1)
# for the reference, use 10 iterations at once:
bst <- lgb.train(param, dtrain, nrounds = 10, watchlist)
err_bst <- lgb.get.eval.result(bst, "train", "binary_logloss", 10)
# first 5 iterations:
bst1 <- lgb.train(param, dtrain, nrounds = 5, watchlist)
# test continuing from a model in file
lgb.save(bst1, "lightgbm.model")
# continue for 5 more:
bst2 <- lgb.train(param, dtrain, nrounds = 5, watchlist, init_model = bst1)
err_bst2 <- lgb.get.eval.result(bst2, "train", "binary_logloss", 10)
expect_lt(abs(err_bst - err_bst2), 0.01)
bst2 <- lgb.train(param, dtrain, nrounds = 5, watchlist, init_model = "lightgbm.model")
err_bst2 <- lgb.get.eval.result(bst2, "train", "binary_logloss", 10)
expect_lt(abs(err_bst - err_bst2), 0.01)
})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment