Commit 551d59ca authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

R package (#168)

* finish R's c_api

* clean code

* fix sizeof pointer in 32bit system.

* add predictor class

* add Dataset class

* format code

* add booster

* add type check for expose function

* add a simple callback

* add all callbacks

* finish the basic training logic

* update docs

* add an simple training interface

* add basic test

* adapt the changes in c_api

* add test for Dataset

* add test for custom obj/eval functions

* fix python test

* fix bug in metadata init

* fix R CMD check
parent acbd4f34
context('Test models with custom objective')
require(lightgbm)
data(agaricus.train, package='lightgbm')
data(agaricus.test, package='lightgbm')
dtrain <- lgb.Dataset(agaricus.train$data, label = agaricus.train$label)
dtest <- lgb.Dataset(agaricus.test$data, label = agaricus.test$label)
watchlist <- list(eval = dtest, train = dtrain)
logregobj <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label")
preds <- 1 / (1 + exp(-preds))
grad <- preds - labels
hess <- preds * (1 - preds)
return(list(grad = grad, hess = hess))
}
evalerror <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label")
err <- as.numeric(sum(labels != (preds > 0))) / length(labels)
return(list(name = "error", value = err, higher_better=FALSE))
}
param <- list(num_leaves=8, learning_rate=1,
objective=logregobj, metric="auc")
num_round <- 10
test_that("custom objective works", {
bst <- lgb.train(param, dtrain, num_round, watchlist, eval=evalerror)
expect_false(is.null(bst$record_evals))
})
require(lightgbm)
require(Matrix)
context("testing lgb.Dataset functionality")
data(agaricus.test, package='lightgbm')
test_data <- agaricus.test$data[1:100,]
test_label <- agaricus.test$label[1:100]
test_that("lgb.Dataset: basic construction, saving, loading", {
# from sparse matrix
dtest1 <- lgb.Dataset(test_data, label=test_label)
# from dense matrix
dtest2 <- lgb.Dataset(as.matrix(test_data), label=test_label)
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest2, 'label'))
# save to a local file
tmp_file <- tempfile('lgb.Dataset_')
lgb.Dataset.save(dtest1, tmp_file)
# read from a local file
dtest3 <- lgb.Dataset(tmp_file)
lgb.Dataset.construct(dtest3)
unlink(tmp_file)
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label'))
})
test_that("lgb.Dataset: getinfo & setinfo", {
dtest <- lgb.Dataset(test_data)
setinfo(dtest, 'label', test_label)
labels <- getinfo(dtest, 'label')
expect_equal(test_label, getinfo(dtest, 'label'))
expect_true(length(getinfo(dtest, 'weight')) == 0)
expect_true(length(getinfo(dtest, 'init_score')) == 0)
# any other label should error
expect_error(setinfo(dtest, 'asdf', test_label))
})
test_that("lgb.Dataset: slice, dim", {
dtest <- lgb.Dataset(test_data, label=test_label)
lgb.Dataset.construct(dtest)
expect_equal(dim(dtest), dim(test_data))
dsub1 <- slice(dtest, 1:42)
lgb.Dataset.construct(dsub1)
expect_equal(nrow(dsub1), 42)
expect_equal(ncol(dsub1), ncol(test_data))
})
test_that("lgb.Dataset: colnames", {
dtest <- lgb.Dataset(test_data, label=test_label)
expect_equal(colnames(dtest), colnames(test_data))
lgb.Dataset.construct(dtest)
expect_equal(colnames(dtest), colnames(test_data))
expect_error( colnames(dtest) <- 'asdf')
new_names <- make.names(1:ncol(test_data))
expect_silent(colnames(dtest) <- new_names)
expect_equal(colnames(dtest), new_names)
})
test_that("lgb.Dataset: nrow is correct for a very sparse matrix", {
nr <- 1000
x <- rsparsematrix(nr, 100, density=0.0005)
# we want it very sparse, so that last rows are empty
expect_lt(max(x@i), nr)
dtest <- lgb.Dataset(x)
expect_equal(dim(dtest), dim(x))
})
......@@ -559,7 +559,7 @@ void GBDT::LoadModelFromString(const std::string& model_str) {
// get feature names
line = Common::FindFromLines(lines, "feature_names=");
if (line.size() > 0) {
feature_names_ = Common::Split(Common::Split(line.c_str(), '=')[1].c_str(), ' ');
feature_names_ = Common::Split(line.substr(std::strlen("feature_names=")).c_str(), " ");
if (feature_names_.size() != static_cast<size_t>(max_feature_idx_ + 1)) {
Log::Fatal("Wrong size of feature_names");
return;
......
......@@ -8,6 +8,10 @@
namespace LightGBM {
Metadata::Metadata() {
num_weights_ = 0;
num_init_score_ = 0;
num_data_ = 0;
num_queries_ = 0;
}
void Metadata::Init(const char * data_filename) {
......
......@@ -116,7 +116,7 @@ public:
}
inline static const char* Name() {
return "logloss";
return "binary_logloss";
}
};
/*!
......@@ -135,7 +135,7 @@ public:
}
inline static const char* Name() {
return "error";
return "binary_error";
}
};
......
......@@ -59,7 +59,7 @@ class TestEngine(unittest.TestCase):
}
evals_result, ret = test_template(params, X_y, log_loss)
self.assertLess(ret, 0.15)
self.assertAlmostEqual(min(evals_result['eval']['logloss']), ret, places=5)
self.assertAlmostEqual(min(evals_result['eval']['binary_logloss']), ret, places=5)
def test_regreesion(self):
evals_result, ret = test_template()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment