"include/vscode:/vscode.git/clone" did not exist on "bbeecc09af946c5ff9b84d1ada4749a9f26bca31"
Unverified Commit 1f1dc452 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[tests][python] refined python tests (#2483)

* speed up tests

* more updates

* fixed pylint

* updated tests

* Update test_sklearn.py

* test that indices are sorted internally
parent 00d1e693
...@@ -69,7 +69,8 @@ def load_from_file(filename, reference): ...@@ -69,7 +69,8 @@ def load_from_file(filename, reference):
LIB.LGBM_DatasetCreateFromFile( LIB.LGBM_DatasetCreateFromFile(
c_str(filename), c_str(filename),
c_str('max_bin=15'), c_str('max_bin=15'),
ref, ctypes.byref(handle)) ref,
ctypes.byref(handle))
print(LIB.LGBM_GetLastError()) print(LIB.LGBM_GetLastError())
num_data = ctypes.c_long() num_data = ctypes.c_long()
LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data)) LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
...@@ -88,8 +89,9 @@ def load_from_csr(filename, reference): ...@@ -88,8 +89,9 @@ def load_from_csr(filename, reference):
label = [] label = []
with open(filename, 'r') as inp: with open(filename, 'r') as inp:
for line in inp.readlines(): for line in inp.readlines():
data.append([float(x) for x in line.split('\t')[1:]]) values = line.split('\t')
label.append(float(line.split('\t')[0])) data.append([float(x) for x in values[1:]])
label.append(float(values[0]))
mat = np.array(data) mat = np.array(data)
label = np.array(label, dtype=np.float32) label = np.array(label, dtype=np.float32)
csr = sparse.csr_matrix(mat) csr = sparse.csr_matrix(mat)
...@@ -124,8 +126,9 @@ def load_from_csc(filename, reference): ...@@ -124,8 +126,9 @@ def load_from_csc(filename, reference):
label = [] label = []
with open(filename, 'r') as inp: with open(filename, 'r') as inp:
for line in inp.readlines(): for line in inp.readlines():
data.append([float(x) for x in line.split('\t')[1:]]) values = line.split('\t')
label.append(float(line.split('\t')[0])) data.append([float(x) for x in values[1:]])
label.append(float(values[0]))
mat = np.array(data) mat = np.array(data)
label = np.array(label, dtype=np.float32) label = np.array(label, dtype=np.float32)
csr = sparse.csc_matrix(mat) csr = sparse.csc_matrix(mat)
...@@ -160,8 +163,9 @@ def load_from_mat(filename, reference): ...@@ -160,8 +163,9 @@ def load_from_mat(filename, reference):
label = [] label = []
with open(filename, 'r') as inp: with open(filename, 'r') as inp:
for line in inp.readlines(): for line in inp.readlines():
data.append([float(x) for x in line.split('\t')[1:]]) values = line.split('\t')
label.append(float(line.split('\t')[0])) data.append([float(x) for x in values[1:]])
label.append(float(values[0]))
mat = np.array(data) mat = np.array(data)
data = np.array(mat.reshape(mat.size), copy=False) data = np.array(mat.reshape(mat.size), copy=False)
label = np.array(label, dtype=np.float32) label = np.array(label, dtype=np.float32)
...@@ -222,7 +226,7 @@ def test_booster(): ...@@ -222,7 +226,7 @@ def test_booster():
ctypes.byref(booster)) ctypes.byref(booster))
LIB.LGBM_BoosterAddValidData(booster, test) LIB.LGBM_BoosterAddValidData(booster, test)
is_finished = ctypes.c_int(0) is_finished = ctypes.c_int(0)
for i in range(1, 101): for i in range(1, 51):
LIB.LGBM_BoosterUpdateOneIter(booster, ctypes.byref(is_finished)) LIB.LGBM_BoosterUpdateOneIter(booster, ctypes.byref(is_finished))
result = np.array([0.0], dtype=np.float64) result = np.array([0.0], dtype=np.float64)
out_len = ctypes.c_ulong(0) out_len = ctypes.c_ulong(0)
...@@ -260,7 +264,7 @@ def test_booster(): ...@@ -260,7 +264,7 @@ def test_booster():
mat.shape[1], mat.shape[1],
1, 1,
1, 1,
50, 25,
c_str(''), c_str(''),
ctypes.byref(num_preb), ctypes.byref(num_preb),
preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double))) preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
...@@ -270,7 +274,7 @@ def test_booster(): ...@@ -270,7 +274,7 @@ def test_booster():
'../../examples/binary_classification/binary.test')), '../../examples/binary_classification/binary.test')),
0, 0,
0, 0,
50, 25,
c_str(''), c_str(''),
c_str('preb.txt')) c_str('preb.txt'))
LIB.LGBM_BoosterFree(booster2) LIB.LGBM_BoosterFree(booster2)
...@@ -31,13 +31,13 @@ class TestBasic(unittest.TestCase): ...@@ -31,13 +31,13 @@ class TestBasic(unittest.TestCase):
bst = lgb.Booster(params, train_data) bst = lgb.Booster(params, train_data)
bst.add_valid(valid_data, "valid_1") bst.add_valid(valid_data, "valid_1")
for i in range(30): for i in range(20):
bst.update() bst.update()
if i % 10 == 0: if i % 10 == 0:
print(bst.eval_train(), bst.eval_valid()) print(bst.eval_train(), bst.eval_valid())
self.assertEqual(bst.current_iteration(), 30) self.assertEqual(bst.current_iteration(), 20)
self.assertEqual(bst.num_trees(), 30) self.assertEqual(bst.num_trees(), 20)
self.assertEqual(bst.num_model_per_iteration(), 1) self.assertEqual(bst.num_model_per_iteration(), 1)
bst.save_model("model.txt") bst.save_model("model.txt")
...@@ -48,26 +48,20 @@ class TestBasic(unittest.TestCase): ...@@ -48,26 +48,20 @@ class TestBasic(unittest.TestCase):
dump_svmlight_file(X_test, y_test, f) dump_svmlight_file(X_test, y_test, f)
pred_from_file = bst.predict(tname) pred_from_file = bst.predict(tname)
os.remove(tname) os.remove(tname)
self.assertEqual(len(pred_from_matr), len(pred_from_file)) np.testing.assert_allclose(pred_from_matr, pred_from_file)
for preds in zip(pred_from_matr, pred_from_file):
self.assertAlmostEqual(*preds, places=15)
# check saved model persistence # check saved model persistence
bst = lgb.Booster(params, model_file="model.txt") bst = lgb.Booster(params, model_file="model.txt")
os.remove("model.txt") os.remove("model.txt")
pred_from_model_file = bst.predict(X_test) pred_from_model_file = bst.predict(X_test)
self.assertEqual(len(pred_from_matr), len(pred_from_model_file))
for preds in zip(pred_from_matr, pred_from_model_file):
# we need to check the consistency of model file here, so test for exact equal # we need to check the consistency of model file here, so test for exact equal
self.assertEqual(*preds) np.testing.assert_array_equal(pred_from_matr, pred_from_model_file)
# check early stopping is working. Make it stop very early, so the scores should be very close to zero # check early stopping is working. Make it stop very early, so the scores should be very close to zero
pred_parameter = {"pred_early_stop": True, "pred_early_stop_freq": 5, "pred_early_stop_margin": 1.5} pred_parameter = {"pred_early_stop": True, "pred_early_stop_freq": 5, "pred_early_stop_margin": 1.5}
pred_early_stopping = bst.predict(X_test, **pred_parameter) pred_early_stopping = bst.predict(X_test, **pred_parameter)
self.assertEqual(len(pred_from_matr), len(pred_early_stopping))
for preds in zip(pred_early_stopping, pred_from_matr):
# scores likely to be different, but prediction should still be the same # scores likely to be different, but prediction should still be the same
self.assertEqual(preds[0] > 0, preds[1] > 0) np.testing.assert_array_equal(np.sign(pred_from_matr), np.sign(pred_early_stopping))
# test that shape is checked during prediction # test that shape is checked during prediction
bad_X_test = X_test[:, 1:] bad_X_test = X_test[:, 1:]
...@@ -97,7 +91,6 @@ class TestBasic(unittest.TestCase): ...@@ -97,7 +91,6 @@ class TestBasic(unittest.TestCase):
train_data = lgb.Dataset(X_train, label=y_train, params={"bin_construct_sample_cnt": 100}) train_data = lgb.Dataset(X_train, label=y_train, params={"bin_construct_sample_cnt": 100})
valid_data = train_data.create_valid(X_test, label=y_test, params={"bin_construct_sample_cnt": 100}) valid_data = train_data.create_valid(X_test, label=y_test, params={"bin_construct_sample_cnt": 100})
train_data.construct() train_data.construct()
valid_data.construct() valid_data.construct()
...@@ -108,23 +101,23 @@ class TestBasic(unittest.TestCase): ...@@ -108,23 +101,23 @@ class TestBasic(unittest.TestCase):
'../../examples/lambdarank/rank.train.query')) '../../examples/lambdarank/rank.train.query'))
lgb_train = lgb.Dataset(X_train, y_train, group=q_train) lgb_train = lgb.Dataset(X_train, y_train, group=q_train)
self.assertEqual(len(lgb_train.get_group()), 201) self.assertEqual(len(lgb_train.get_group()), 201)
subset = lgb_train.subset(list(lgb.compat.range_(10))).construct() subset = lgb_train.subset(list(range(10))).construct()
subset_group = subset.get_group() subset_group = subset.get_group()
self.assertEqual(len(subset_group), 2) self.assertEqual(len(subset_group), 2)
self.assertEqual(subset_group[0], 1) self.assertEqual(subset_group[0], 1)
self.assertEqual(subset_group[1], 9) self.assertEqual(subset_group[1], 9)
def test_add_features_throws_if_num_data_unequal(self): def test_add_features_throws_if_num_data_unequal(self):
X1 = np.random.random((1000, 1)) X1 = np.random.random((100, 1))
X2 = np.random.random((100, 1)) X2 = np.random.random((10, 1))
d1 = lgb.Dataset(X1).construct() d1 = lgb.Dataset(X1).construct()
d2 = lgb.Dataset(X2).construct() d2 = lgb.Dataset(X2).construct()
with self.assertRaises(lgb.basic.LightGBMError): with self.assertRaises(lgb.basic.LightGBMError):
d1.add_features_from(d2) d1.add_features_from(d2)
def test_add_features_throws_if_datasets_unconstructed(self): def test_add_features_throws_if_datasets_unconstructed(self):
X1 = np.random.random((1000, 1)) X1 = np.random.random((100, 1))
X2 = np.random.random((1000, 1)) X2 = np.random.random((100, 1))
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
d1 = lgb.Dataset(X1) d1 = lgb.Dataset(X1)
d2 = lgb.Dataset(X2) d2 = lgb.Dataset(X2)
...@@ -139,7 +132,8 @@ class TestBasic(unittest.TestCase): ...@@ -139,7 +132,8 @@ class TestBasic(unittest.TestCase):
d1.add_features_from(d2) d1.add_features_from(d2)
def test_add_features_equal_data_on_alternating_used_unused(self): def test_add_features_equal_data_on_alternating_used_unused(self):
X = np.random.random((1000, 5)) self.maxDiff = None
X = np.random.random((100, 5))
X[:, [1, 3]] = 0 X[:, [1, 3]] = 0
names = ['col_%d' % i for i in range(5)] names = ['col_%d' % i for i in range(5)]
for j in range(1, 5): for j in range(1, 5):
...@@ -162,7 +156,8 @@ class TestBasic(unittest.TestCase): ...@@ -162,7 +156,8 @@ class TestBasic(unittest.TestCase):
self.assertEqual(dtxt, d1txt) self.assertEqual(dtxt, d1txt)
def test_add_features_same_booster_behaviour(self): def test_add_features_same_booster_behaviour(self):
X = np.random.random((1000, 5)) self.maxDiff = None
X = np.random.random((100, 5))
X[:, [1, 3]] = 0 X[:, [1, 3]] = 0
names = ['col_%d' % i for i in range(5)] names = ['col_%d' % i for i in range(5)]
for j in range(1, 5): for j in range(1, 5):
...@@ -170,7 +165,7 @@ class TestBasic(unittest.TestCase): ...@@ -170,7 +165,7 @@ class TestBasic(unittest.TestCase):
d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct() d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct()
d1.add_features_from(d2) d1.add_features_from(d2)
d = lgb.Dataset(X, feature_name=names).construct() d = lgb.Dataset(X, feature_name=names).construct()
y = np.random.random(1000) y = np.random.random(100)
d1.set_label(y) d1.set_label(y)
d.set_label(y) d.set_label(y)
b1 = lgb.Booster(train_set=d1) b1 = lgb.Booster(train_set=d1)
...@@ -191,7 +186,7 @@ class TestBasic(unittest.TestCase): ...@@ -191,7 +186,7 @@ class TestBasic(unittest.TestCase):
self.assertEqual(dtxt, d1txt) self.assertEqual(dtxt, d1txt)
def test_get_feature_penalty_and_monotone_constraints(self): def test_get_feature_penalty_and_monotone_constraints(self):
X = np.random.random((1000, 1)) X = np.random.random((100, 1))
d = lgb.Dataset(X, params={'feature_penalty': [0.5], d = lgb.Dataset(X, params={'feature_penalty': [0.5],
'monotone_constraints': [1]}).construct() 'monotone_constraints': [1]}).construct()
np.testing.assert_allclose(d.get_feature_penalty(), [0.5]) np.testing.assert_allclose(d.get_feature_penalty(), [0.5])
...@@ -201,7 +196,7 @@ class TestBasic(unittest.TestCase): ...@@ -201,7 +196,7 @@ class TestBasic(unittest.TestCase):
self.assertIsNone(d.get_monotone_constraints()) self.assertIsNone(d.get_monotone_constraints())
def test_add_features_feature_penalty(self): def test_add_features_feature_penalty(self):
X = np.random.random((1000, 2)) X = np.random.random((100, 2))
test_cases = [ test_cases = [
(None, None, None), (None, None, None),
([0.5], None, [0.5, 1]), ([0.5], None, [0.5, 1]),
...@@ -220,7 +215,7 @@ class TestBasic(unittest.TestCase): ...@@ -220,7 +215,7 @@ class TestBasic(unittest.TestCase):
np.testing.assert_allclose(actual, expected) np.testing.assert_allclose(actual, expected)
def test_add_features_monotone_types(self): def test_add_features_monotone_types(self):
X = np.random.random((1000, 2)) X = np.random.random((100, 2))
test_cases = [ test_cases = [
(None, None, None), (None, None, None),
([1], None, [1, 0]), ([1], None, [1, 0]),
...@@ -239,9 +234,9 @@ class TestBasic(unittest.TestCase): ...@@ -239,9 +234,9 @@ class TestBasic(unittest.TestCase):
np.testing.assert_array_equal(actual, expected) np.testing.assert_array_equal(actual, expected)
def test_cegb_affects_behavior(self): def test_cegb_affects_behavior(self):
X = np.random.random((1000, 5)) X = np.random.random((100, 5))
X[:, [1, 3]] = 0 X[:, [1, 3]] = 0
y = np.random.random(1000) y = np.random.random(100)
names = ['col_%d' % i for i in range(5)] names = ['col_%d' % i for i in range(5)]
ds = lgb.Dataset(X, feature_name=names).construct() ds = lgb.Dataset(X, feature_name=names).construct()
ds.set_label(y) ds.set_label(y)
...@@ -269,9 +264,9 @@ class TestBasic(unittest.TestCase): ...@@ -269,9 +264,9 @@ class TestBasic(unittest.TestCase):
self.assertNotEqual(basetxt, casetxt) self.assertNotEqual(basetxt, casetxt)
def test_cegb_scaling_equalities(self): def test_cegb_scaling_equalities(self):
X = np.random.random((1000, 5)) X = np.random.random((100, 5))
X[:, [1, 3]] = 0 X[:, [1, 3]] = 0
y = np.random.random(1000) y = np.random.random(100)
names = ['col_%d' % i for i in range(5)] names = ['col_%d' % i for i in range(5)]
ds = lgb.Dataset(X, feature_name=names).construct() ds = lgb.Dataset(X, feature_name=names).construct()
ds.set_label(y) ds.set_label(y)
...@@ -298,9 +293,9 @@ class TestBasic(unittest.TestCase): ...@@ -298,9 +293,9 @@ class TestBasic(unittest.TestCase):
with tempfile.NamedTemporaryFile() as f: with tempfile.NamedTemporaryFile() as f:
p2name = f.name p2name = f.name
booster2.save_model(p2name) booster2.save_model(p2name)
self.maxDiff = None
with open(p2name, 'rt') as f: with open(p2name, 'rt') as f:
p2txt = f.read() p2txt = f.read()
self.maxDiff = None
self.assertEqual(p1txt, p2txt) self.assertEqual(p1txt, p2txt)
def test_consistent_state_for_dataset_fields(self): def test_consistent_state_for_dataset_fields(self):
......
...@@ -21,6 +21,13 @@ except ImportError: ...@@ -21,6 +21,13 @@ except ImportError:
import pickle import pickle
decreasing_generator = itertools.count(0, -1)
def dummy_obj(preds, train_data):
return np.ones(preds.shape), np.ones(preds.shape)
def multi_logloss(y_true, y_pred): def multi_logloss(y_true, y_pred):
return np.mean([-math.log(y_pred[i][y]) for i, y in enumerate(y_true)]) return np.mean([-math.log(y_pred[i][y]) for i, y in enumerate(y_true)])
...@@ -32,6 +39,14 @@ def top_k_error(y_true, y_pred, k): ...@@ -32,6 +39,14 @@ def top_k_error(y_true, y_pred, k):
return 1 - np.mean((y_pred[np.arange(len(y_true)), y_true] > max_rest)) return 1 - np.mean((y_pred[np.arange(len(y_true)), y_true] > max_rest))
def constant_metric(preds, train_data):
return ('error', 0.0, False)
def decreasing_metric(preds, train_data):
return ('decreasing_metric', next(decreasing_generator), False)
class TestEngine(unittest.TestCase): class TestEngine(unittest.TestCase):
def test_binary(self): def test_binary(self):
X, y = load_breast_cancer(True) X, y = load_breast_cancer(True)
...@@ -51,7 +66,7 @@ class TestEngine(unittest.TestCase): ...@@ -51,7 +66,7 @@ class TestEngine(unittest.TestCase):
verbose_eval=False, verbose_eval=False,
evals_result=evals_result) evals_result=evals_result)
ret = log_loss(y_test, gbm.predict(X_test)) ret = log_loss(y_test, gbm.predict(X_test))
self.assertLess(ret, 0.15) self.assertLess(ret, 0.11)
self.assertEqual(len(evals_result['valid_0']['binary_logloss']), 50) self.assertEqual(len(evals_result['valid_0']['binary_logloss']), 50)
self.assertAlmostEqual(evals_result['valid_0']['binary_logloss'][-1], ret, places=5) self.assertAlmostEqual(evals_result['valid_0']['binary_logloss'][-1], ret, places=5)
...@@ -77,7 +92,7 @@ class TestEngine(unittest.TestCase): ...@@ -77,7 +92,7 @@ class TestEngine(unittest.TestCase):
verbose_eval=False, verbose_eval=False,
evals_result=evals_result) evals_result=evals_result)
ret = log_loss(y_test, gbm.predict(X_test)) ret = log_loss(y_test, gbm.predict(X_test))
self.assertLess(ret, 0.25) self.assertLess(ret, 0.19)
self.assertAlmostEqual(evals_result['valid_0']['binary_logloss'][-1], ret, places=5) self.assertAlmostEqual(evals_result['valid_0']['binary_logloss'][-1], ret, places=5)
def test_regression(self): def test_regression(self):
...@@ -96,13 +111,13 @@ class TestEngine(unittest.TestCase): ...@@ -96,13 +111,13 @@ class TestEngine(unittest.TestCase):
verbose_eval=False, verbose_eval=False,
evals_result=evals_result) evals_result=evals_result)
ret = mean_squared_error(y_test, gbm.predict(X_test)) ret = mean_squared_error(y_test, gbm.predict(X_test))
self.assertLess(ret, 16) self.assertLess(ret, 7)
self.assertAlmostEqual(evals_result['valid_0']['l2'][-1], ret, places=5) self.assertAlmostEqual(evals_result['valid_0']['l2'][-1], ret, places=5)
def test_missing_value_handle(self): def test_missing_value_handle(self):
X_train = np.zeros((1000, 1)) X_train = np.zeros((100, 1))
y_train = np.zeros(1000) y_train = np.zeros(100)
trues = random.sample(range(1000), 200) trues = random.sample(range(100), 20)
for idx in trues: for idx in trues:
X_train[idx, 0] = np.nan X_train[idx, 0] = np.nan
y_train[idx] = 1 y_train[idx] = 1
...@@ -118,7 +133,7 @@ class TestEngine(unittest.TestCase): ...@@ -118,7 +133,7 @@ class TestEngine(unittest.TestCase):
gbm = lgb.train(params, lgb_train, gbm = lgb.train(params, lgb_train,
num_boost_round=20, num_boost_round=20,
valid_sets=lgb_eval, valid_sets=lgb_eval,
verbose_eval=True, verbose_eval=False,
evals_result=evals_result) evals_result=evals_result)
ret = mean_squared_error(y_train, gbm.predict(X_train)) ret = mean_squared_error(y_train, gbm.predict(X_train))
self.assertLess(ret, 0.005) self.assertLess(ret, 0.005)
...@@ -148,7 +163,7 @@ class TestEngine(unittest.TestCase): ...@@ -148,7 +163,7 @@ class TestEngine(unittest.TestCase):
gbm = lgb.train(params, lgb_train, gbm = lgb.train(params, lgb_train,
num_boost_round=1, num_boost_round=1,
valid_sets=lgb_eval, valid_sets=lgb_eval,
verbose_eval=True, verbose_eval=False,
evals_result=evals_result) evals_result=evals_result)
pred = gbm.predict(X_train) pred = gbm.predict(X_train)
np.testing.assert_allclose(pred, y) np.testing.assert_allclose(pred, y)
...@@ -180,7 +195,7 @@ class TestEngine(unittest.TestCase): ...@@ -180,7 +195,7 @@ class TestEngine(unittest.TestCase):
gbm = lgb.train(params, lgb_train, gbm = lgb.train(params, lgb_train,
num_boost_round=1, num_boost_round=1,
valid_sets=lgb_eval, valid_sets=lgb_eval,
verbose_eval=True, verbose_eval=False,
evals_result=evals_result) evals_result=evals_result)
pred = gbm.predict(X_train) pred = gbm.predict(X_train)
np.testing.assert_allclose(pred, y) np.testing.assert_allclose(pred, y)
...@@ -212,11 +227,11 @@ class TestEngine(unittest.TestCase): ...@@ -212,11 +227,11 @@ class TestEngine(unittest.TestCase):
gbm = lgb.train(params, lgb_train, gbm = lgb.train(params, lgb_train,
num_boost_round=1, num_boost_round=1,
valid_sets=lgb_eval, valid_sets=lgb_eval,
verbose_eval=True, verbose_eval=False,
evals_result=evals_result) evals_result=evals_result)
pred = gbm.predict(X_train) pred = gbm.predict(X_train)
self.assertAlmostEqual(pred[0], pred[1], places=5) self.assertAlmostEqual(pred[0], pred[1])
self.assertAlmostEqual(pred[-1], pred[0], places=5) self.assertAlmostEqual(pred[-1], pred[0])
ret = roc_auc_score(y_train, pred) ret = roc_auc_score(y_train, pred)
self.assertGreater(ret, 0.83) self.assertGreater(ret, 0.83)
self.assertAlmostEqual(evals_result['valid_0']['auc'][-1], ret, places=5) self.assertAlmostEqual(evals_result['valid_0']['auc'][-1], ret, places=5)
...@@ -250,7 +265,7 @@ class TestEngine(unittest.TestCase): ...@@ -250,7 +265,7 @@ class TestEngine(unittest.TestCase):
gbm = lgb.train(params, lgb_train, gbm = lgb.train(params, lgb_train,
num_boost_round=1, num_boost_round=1,
valid_sets=lgb_eval, valid_sets=lgb_eval,
verbose_eval=True, verbose_eval=False,
evals_result=evals_result) evals_result=evals_result)
pred = gbm.predict(X_train) pred = gbm.predict(X_train)
np.testing.assert_allclose(pred, y) np.testing.assert_allclose(pred, y)
...@@ -287,7 +302,7 @@ class TestEngine(unittest.TestCase): ...@@ -287,7 +302,7 @@ class TestEngine(unittest.TestCase):
gbm = lgb.train(params, lgb_train, gbm = lgb.train(params, lgb_train,
num_boost_round=1, num_boost_round=1,
valid_sets=lgb_eval, valid_sets=lgb_eval,
verbose_eval=True, verbose_eval=False,
evals_result=evals_result) evals_result=evals_result)
pred = gbm.predict(X_train) pred = gbm.predict(X_train)
np.testing.assert_allclose(pred, y) np.testing.assert_allclose(pred, y)
...@@ -313,7 +328,7 @@ class TestEngine(unittest.TestCase): ...@@ -313,7 +328,7 @@ class TestEngine(unittest.TestCase):
verbose_eval=False, verbose_eval=False,
evals_result=evals_result) evals_result=evals_result)
ret = multi_logloss(y_test, gbm.predict(X_test)) ret = multi_logloss(y_test, gbm.predict(X_test))
self.assertLess(ret, 0.2) self.assertLess(ret, 0.15)
self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5) self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5)
def test_multiclass_rf(self): def test_multiclass_rf(self):
...@@ -335,12 +350,12 @@ class TestEngine(unittest.TestCase): ...@@ -335,12 +350,12 @@ class TestEngine(unittest.TestCase):
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, params=params) lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, params=params)
evals_result = {} evals_result = {}
gbm = lgb.train(params, lgb_train, gbm = lgb.train(params, lgb_train,
num_boost_round=100, num_boost_round=50,
valid_sets=lgb_eval, valid_sets=lgb_eval,
verbose_eval=False, verbose_eval=False,
evals_result=evals_result) evals_result=evals_result)
ret = multi_logloss(y_test, gbm.predict(X_test)) ret = multi_logloss(y_test, gbm.predict(X_test))
self.assertLess(ret, 0.4) self.assertLess(ret, 0.23)
self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5) self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5)
def test_multiclass_prediction_early_stopping(self): def test_multiclass_prediction_early_stopping(self):
...@@ -361,7 +376,7 @@ class TestEngine(unittest.TestCase): ...@@ -361,7 +376,7 @@ class TestEngine(unittest.TestCase):
"pred_early_stop_margin": 1.5} "pred_early_stop_margin": 1.5}
ret = multi_logloss(y_test, gbm.predict(X_test, **pred_parameter)) ret = multi_logloss(y_test, gbm.predict(X_test, **pred_parameter))
self.assertLess(ret, 0.8) self.assertLess(ret, 0.8)
self.assertGreater(ret, 0.5) # loss will be higher than when evaluating the full model self.assertGreater(ret, 0.6) # loss will be higher than when evaluating the full model
pred_parameter = {"pred_early_stop": True, pred_parameter = {"pred_early_stop": True,
"pred_early_stop_freq": 5, "pred_early_stop_freq": 5,
...@@ -436,11 +451,12 @@ class TestEngine(unittest.TestCase): ...@@ -436,11 +451,12 @@ class TestEngine(unittest.TestCase):
self.assertIn('binary_logloss', gbm.best_score[valid_set_name]) self.assertIn('binary_logloss', gbm.best_score[valid_set_name])
# early stopping occurs # early stopping occurs
gbm = lgb.train(params, lgb_train, gbm = lgb.train(params, lgb_train,
num_boost_round=40,
valid_sets=lgb_eval, valid_sets=lgb_eval,
valid_names=valid_set_name, valid_names=valid_set_name,
verbose_eval=False, verbose_eval=False,
early_stopping_rounds=5) early_stopping_rounds=5)
self.assertLessEqual(gbm.best_iteration, 100) self.assertLessEqual(gbm.best_iteration, 31)
self.assertIn(valid_set_name, gbm.best_score) self.assertIn(valid_set_name, gbm.best_score)
self.assertIn('binary_logloss', gbm.best_score[valid_set_name]) self.assertIn('binary_logloss', gbm.best_score[valid_set_name])
...@@ -463,14 +479,13 @@ class TestEngine(unittest.TestCase): ...@@ -463,14 +479,13 @@ class TestEngine(unittest.TestCase):
valid_sets=lgb_eval, valid_sets=lgb_eval,
verbose_eval=False, verbose_eval=False,
# test custom eval metrics # test custom eval metrics
feval=(lambda p, d: ('mae', mean_absolute_error(p, d.get_label()), False)), feval=(lambda p, d: ('custom_mae', mean_absolute_error(p, d.get_label()), False)),
evals_result=evals_result, evals_result=evals_result,
init_model='model.txt') init_model='model.txt')
ret = mean_absolute_error(y_test, gbm.predict(X_test)) ret = mean_absolute_error(y_test, gbm.predict(X_test))
self.assertLess(ret, 3.5) self.assertLess(ret, 2.0)
self.assertAlmostEqual(evals_result['valid_0']['l1'][-1], ret, places=5) self.assertAlmostEqual(evals_result['valid_0']['l1'][-1], ret, places=5)
for l1, mae in zip(evals_result['valid_0']['l1'], evals_result['valid_0']['mae']): np.testing.assert_allclose(evals_result['valid_0']['l1'], evals_result['valid_0']['custom_mae'])
self.assertAlmostEqual(l1, mae, places=5)
os.remove(model_name) os.remove(model_name)
def test_continue_train_dart(self): def test_continue_train_dart(self):
...@@ -493,7 +508,7 @@ class TestEngine(unittest.TestCase): ...@@ -493,7 +508,7 @@ class TestEngine(unittest.TestCase):
evals_result=evals_result, evals_result=evals_result,
init_model=init_gbm) init_model=init_gbm)
ret = mean_absolute_error(y_test, gbm.predict(X_test)) ret = mean_absolute_error(y_test, gbm.predict(X_test))
self.assertLess(ret, 2.5) self.assertLess(ret, 2.0)
self.assertAlmostEqual(evals_result['valid_0']['l1'][-1], ret, places=5) self.assertAlmostEqual(evals_result['valid_0']['l1'][-1], ret, places=5)
def test_continue_train_multiclass(self): def test_continue_train_multiclass(self):
...@@ -516,12 +531,11 @@ class TestEngine(unittest.TestCase): ...@@ -516,12 +531,11 @@ class TestEngine(unittest.TestCase):
evals_result=evals_result, evals_result=evals_result,
init_model=init_gbm) init_model=init_gbm)
ret = multi_logloss(y_test, gbm.predict(X_test)) ret = multi_logloss(y_test, gbm.predict(X_test))
self.assertLess(ret, 1.5) self.assertLess(ret, 0.1)
self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5) self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5)
def test_cv(self): def test_cv(self):
X, y = load_boston(True) X_train, y_train = load_boston(True)
X_train, _, y_train, _ = train_test_split(X, y, test_size=0.1, random_state=42)
params = {'verbose': -1} params = {'verbose': -1}
lgb_train = lgb.Dataset(X_train, y_train) lgb_train = lgb.Dataset(X_train, y_train)
# shuffle = False, override metric in params # shuffle = False, override metric in params
...@@ -580,8 +594,7 @@ class TestEngine(unittest.TestCase): ...@@ -580,8 +594,7 @@ class TestEngine(unittest.TestCase):
np.testing.assert_allclose(cv_res_lambda['ndcg@3-mean'], cv_res_lambda_obj['ndcg@3-mean']) np.testing.assert_allclose(cv_res_lambda['ndcg@3-mean'], cv_res_lambda_obj['ndcg@3-mean'])
def test_feature_name(self): def test_feature_name(self):
X, y = load_boston(True) X_train, y_train = load_boston(True)
X_train, _, y_train, _ = train_test_split(X, y, test_size=0.1, random_state=42)
params = {'verbose': -1} params = {'verbose': -1}
lgb_train = lgb.Dataset(X_train, y_train) lgb_train = lgb.Dataset(X_train, y_train)
feature_names = ['f_' + str(i) for i in range(X_train.shape[-1])] feature_names = ['f_' + str(i) for i in range(X_train.shape[-1])]
...@@ -593,7 +606,7 @@ class TestEngine(unittest.TestCase): ...@@ -593,7 +606,7 @@ class TestEngine(unittest.TestCase):
self.assertListEqual(feature_names, gbm.feature_name()) self.assertListEqual(feature_names, gbm.feature_name())
def test_save_load_copy_pickle(self): def test_save_load_copy_pickle(self):
def test_template(init_model=None, return_model=False): def train_and_predict(init_model=None, return_model=False):
X, y = load_boston(True) X, y = load_boston(True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
params = { params = {
...@@ -604,22 +617,23 @@ class TestEngine(unittest.TestCase): ...@@ -604,22 +617,23 @@ class TestEngine(unittest.TestCase):
lgb_train = lgb.Dataset(X_train, y_train) lgb_train = lgb.Dataset(X_train, y_train)
gbm_template = lgb.train(params, lgb_train, num_boost_round=10, init_model=init_model) gbm_template = lgb.train(params, lgb_train, num_boost_round=10, init_model=init_model)
return gbm_template if return_model else mean_squared_error(y_test, gbm_template.predict(X_test)) return gbm_template if return_model else mean_squared_error(y_test, gbm_template.predict(X_test))
gbm = test_template(return_model=True)
ret_origin = test_template(init_model=gbm) gbm = train_and_predict(return_model=True)
ret_origin = train_and_predict(init_model=gbm)
other_ret = [] other_ret = []
gbm.save_model('lgb.model') gbm.save_model('lgb.model')
other_ret.append(test_template(init_model='lgb.model')) other_ret.append(train_and_predict(init_model='lgb.model'))
gbm_load = lgb.Booster(model_file='lgb.model') gbm_load = lgb.Booster(model_file='lgb.model')
other_ret.append(test_template(init_model=gbm_load)) other_ret.append(train_and_predict(init_model=gbm_load))
other_ret.append(test_template(init_model=copy.copy(gbm))) other_ret.append(train_and_predict(init_model=copy.copy(gbm)))
other_ret.append(test_template(init_model=copy.deepcopy(gbm))) other_ret.append(train_and_predict(init_model=copy.deepcopy(gbm)))
with open('lgb.pkl', 'wb') as f: with open('lgb.pkl', 'wb') as f:
pickle.dump(gbm, f) pickle.dump(gbm, f)
with open('lgb.pkl', 'rb') as f: with open('lgb.pkl', 'rb') as f:
gbm_pickle = pickle.load(f) gbm_pickle = pickle.load(f)
other_ret.append(test_template(init_model=gbm_pickle)) other_ret.append(train_and_predict(init_model=gbm_pickle))
gbm_pickles = pickle.loads(pickle.dumps(gbm)) gbm_pickles = pickle.loads(pickle.dumps(gbm))
other_ret.append(test_template(init_model=gbm_pickles)) other_ret.append(train_and_predict(init_model=gbm_pickles))
for ret in other_ret: for ret in other_ret:
self.assertAlmostEqual(ret_origin, ret, places=5) self.assertAlmostEqual(ret_origin, ret, places=5)
...@@ -670,16 +684,15 @@ class TestEngine(unittest.TestCase): ...@@ -670,16 +684,15 @@ class TestEngine(unittest.TestCase):
gbm3.save_model('categorical.model') gbm3.save_model('categorical.model')
gbm4 = lgb.Booster(model_file='categorical.model') gbm4 = lgb.Booster(model_file='categorical.model')
pred4 = gbm4.predict(X_test) pred4 = gbm4.predict(X_test)
self.assertListEqual(lgb_train.categorical_feature, ['A', 'B', 'C', 'D'])
model_str = gbm4.model_to_string() model_str = gbm4.model_to_string()
gbm4.model_from_string(model_str, False) gbm4.model_from_string(model_str, False)
pred5 = gbm4.predict(X_test) pred5 = gbm4.predict(X_test)
gbm5 = lgb.Booster(model_str=model_str) gbm5 = lgb.Booster(model_str=model_str)
pred6 = gbm5.predict(X_test) pred6 = gbm5.predict(X_test)
lgb_train = lgb.Dataset(X, y) lgb_train = lgb.Dataset(X, y)
gbm6 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=['E']) gbm6 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=['A', 'B', 'C', 'D', 'E'])
pred7 = gbm6.predict(X_test) pred7 = gbm6.predict(X_test)
self.assertListEqual(lgb_train.categorical_feature, ['E']) self.assertListEqual(lgb_train.categorical_feature, ['A', 'B', 'C', 'D', 'E'])
lgb_train = lgb.Dataset(X, y) lgb_train = lgb.Dataset(X, y)
gbm7 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=[]) gbm7 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=[])
pred8 = gbm7.predict(X_test) pred8 = gbm7.predict(X_test)
...@@ -746,7 +759,8 @@ class TestEngine(unittest.TestCase): ...@@ -746,7 +759,8 @@ class TestEngine(unittest.TestCase):
params = {'objective': 'regression_l2', 'metric': 'rmse'} params = {'objective': 'regression_l2', 'metric': 'rmse'}
evals_result = {} evals_result = {}
gbm = lgb.train(params, tmp_dat_train, num_boost_round=20, gbm = lgb.train(params, tmp_dat_train, num_boost_round=20,
valid_sets=[tmp_dat_train, tmp_dat_val], evals_result=evals_result) valid_sets=[tmp_dat_train, tmp_dat_val],
verbose_eval=False, evals_result=evals_result)
self.assertEqual(len(evals_result['training']['rmse']), 20) self.assertEqual(len(evals_result['training']['rmse']), 20)
self.assertEqual(len(evals_result['valid_1']['rmse']), 20) self.assertEqual(len(evals_result['valid_1']['rmse']), 20)
...@@ -759,8 +773,7 @@ class TestEngine(unittest.TestCase): ...@@ -759,8 +773,7 @@ class TestEngine(unittest.TestCase):
'verbose': -1, 'verbose': -1,
} }
lgb_train = lgb.Dataset(X_train, y_train) lgb_train = lgb.Dataset(X_train, y_train)
gbm = lgb.train(params, lgb_train, gbm = lgb.train(params, lgb_train, num_boost_round=20)
num_boost_round=20)
self.assertLess(np.linalg.norm(gbm.predict(X_test, raw_score=True) self.assertLess(np.linalg.norm(gbm.predict(X_test, raw_score=True)
- np.sum(gbm.predict(X_test, pred_contrib=True), axis=1)), 1e-4) - np.sum(gbm.predict(X_test, pred_contrib=True), axis=1)), 1e-4)
...@@ -783,10 +796,8 @@ class TestEngine(unittest.TestCase): ...@@ -783,10 +796,8 @@ class TestEngine(unittest.TestCase):
num_samples = 100 num_samples = 100
features = np.random.rand(num_samples, 5) features = np.random.rand(num_samples, 5)
positive_samples = int(num_samples * 0.25) positive_samples = int(num_samples * 0.25)
labels = np.append( labels = np.append(np.ones(positive_samples, dtype=np.float32),
np.ones(positive_samples, dtype=np.float32), np.zeros(num_samples - positive_samples, dtype=np.float32))
np.zeros(num_samples - positive_samples, dtype=np.float32),
)
# test sliced labels # test sliced labels
origin_pred = train_and_get_predictions(features, labels) origin_pred = train_and_get_predictions(features, labels)
stacked_labels = np.column_stack((labels, np.ones(num_samples, dtype=np.float32))) stacked_labels = np.column_stack((labels, np.ones(num_samples, dtype=np.float32)))
...@@ -816,12 +827,12 @@ class TestEngine(unittest.TestCase): ...@@ -816,12 +827,12 @@ class TestEngine(unittest.TestCase):
np.testing.assert_allclose(origin_pred, sliced_pred) np.testing.assert_allclose(origin_pred, sliced_pred)
def test_init_with_subset(self): def test_init_with_subset(self):
data = np.random.random((500, 2)) data = np.random.random((50, 2))
y = [1] * 250 + [0] * 250 y = [1] * 25 + [0] * 25
lgb_train = lgb.Dataset(data, y, free_raw_data=False) lgb_train = lgb.Dataset(data, y, free_raw_data=False)
subset_index_1 = sorted(np.random.choice(np.arange(500), 300, replace=False)) subset_index_1 = np.random.choice(np.arange(50), 30, replace=False)
subset_data_1 = lgb_train.subset(subset_index_1) subset_data_1 = lgb_train.subset(subset_index_1)
subset_index_2 = sorted(np.random.choice(np.arange(500), 200, replace=False)) subset_index_2 = np.random.choice(np.arange(50), 20, replace=False)
subset_data_2 = lgb_train.subset(subset_index_2) subset_data_2 = lgb_train.subset(subset_index_2)
params = { params = {
'objective': 'binary', 'objective': 'binary',
...@@ -835,9 +846,9 @@ class TestEngine(unittest.TestCase): ...@@ -835,9 +846,9 @@ class TestEngine(unittest.TestCase):
train_set=subset_data_2, train_set=subset_data_2,
num_boost_round=10, num_boost_round=10,
init_model=init_gbm) init_model=init_gbm)
self.assertEqual(lgb_train.get_data().shape[0], 500) self.assertEqual(lgb_train.get_data().shape[0], 50)
self.assertEqual(subset_data_1.get_data().shape[0], 300) self.assertEqual(subset_data_1.get_data().shape[0], 30)
self.assertEqual(subset_data_2.get_data().shape[0], 200) self.assertEqual(subset_data_2.get_data().shape[0], 20)
lgb_train.save_binary("lgb_train_data.bin") lgb_train.save_binary("lgb_train_data.bin")
lgb_train_from_file = lgb.Dataset('lgb_train_data.bin', free_raw_data=False) lgb_train_from_file = lgb.Dataset('lgb_train_data.bin', free_raw_data=False)
subset_data_3 = lgb_train_from_file.subset(subset_index_1) subset_data_3 = lgb_train_from_file.subset(subset_index_1)
...@@ -876,7 +887,7 @@ class TestEngine(unittest.TestCase): ...@@ -876,7 +887,7 @@ class TestEngine(unittest.TestCase):
return False return False
return True return True
number_of_dpoints = 3000 number_of_dpoints = 2000
x1_positively_correlated_with_y = np.random.random(size=number_of_dpoints) x1_positively_correlated_with_y = np.random.random(size=number_of_dpoints)
x2_negatively_correlated_with_y = np.random.random(size=number_of_dpoints) x2_negatively_correlated_with_y = np.random.random(size=number_of_dpoints)
x = np.column_stack((x1_positively_correlated_with_y, x2_negatively_correlated_with_y)) x = np.column_stack((x1_positively_correlated_with_y, x2_negatively_correlated_with_y))
...@@ -931,11 +942,11 @@ class TestEngine(unittest.TestCase): ...@@ -931,11 +942,11 @@ class TestEngine(unittest.TestCase):
'verbose': -1, 'verbose': -1,
'max_bin': 2} 'max_bin': 2}
lgb_x = lgb.Dataset(x, label=y) lgb_x = lgb.Dataset(x, label=y)
est = lgb.train(params, lgb_x, num_boost_round=5) lgb.train(params, lgb_x, num_boost_round=5)
x[0, 0] = np.nan x[0, 0] = np.nan
params['max_bin'] = 3 params['max_bin'] = 3
lgb_x = lgb.Dataset(x, label=y) lgb_x = lgb.Dataset(x, label=y)
est = lgb.train(params, lgb_x, num_boost_round=5) lgb.train(params, lgb_x, num_boost_round=5)
np.random.seed() # reset seed np.random.seed() # reset seed
def test_refit(self): def test_refit(self):
...@@ -1059,12 +1070,6 @@ class TestEngine(unittest.TestCase): ...@@ -1059,12 +1070,6 @@ class TestEngine(unittest.TestCase):
self.assertEqual(len(results['multi_logloss-mean']), 10) self.assertEqual(len(results['multi_logloss-mean']), 10)
def test_metrics(self): def test_metrics(self):
def custom_obj(preds, train_data):
return np.zeros(preds.shape), np.zeros(preds.shape)
def custom_metric(preds, train_data):
return 'error', 0, False
X, y = load_digits(2, True) X, y = load_digits(2, True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
lgb_train = lgb.Dataset(X_train, y_train, silent=True) lgb_train = lgb.Dataset(X_train, y_train, silent=True)
...@@ -1087,11 +1092,11 @@ class TestEngine(unittest.TestCase): ...@@ -1087,11 +1092,11 @@ class TestEngine(unittest.TestCase):
params_metric_none_verbose = {'metric': 'None', 'verbose': -1} params_metric_none_verbose = {'metric': 'None', 'verbose': -1}
def get_cv_result(params=params_obj_verbose, **kwargs): def get_cv_result(params=params_obj_verbose, **kwargs):
return lgb.cv(params, lgb_train, num_boost_round=5, verbose_eval=False, **kwargs) return lgb.cv(params, lgb_train, num_boost_round=2, verbose_eval=False, **kwargs)
def train_booster(params=params_obj_verbose, **kwargs): def train_booster(params=params_obj_verbose, **kwargs):
lgb.train(params, lgb_train, lgb.train(params, lgb_train,
num_boost_round=5, num_boost_round=2,
valid_sets=[lgb_valid], valid_sets=[lgb_valid],
evals_result=evals_result, evals_result=evals_result,
verbose_eval=False, **kwargs) verbose_eval=False, **kwargs)
...@@ -1145,32 +1150,32 @@ class TestEngine(unittest.TestCase): ...@@ -1145,32 +1150,32 @@ class TestEngine(unittest.TestCase):
# fobj, no feval # fobj, no feval
# no default metric # no default metric
res = get_cv_result(params=params_verbose, fobj=custom_obj) res = get_cv_result(params=params_verbose, fobj=dummy_obj)
self.assertEqual(len(res), 0) self.assertEqual(len(res), 0)
# metric in params # metric in params
res = get_cv_result(params=params_metric_err_verbose, fobj=custom_obj) res = get_cv_result(params=params_metric_err_verbose, fobj=dummy_obj)
self.assertEqual(len(res), 2) self.assertEqual(len(res), 2)
self.assertIn('binary_error-mean', res) self.assertIn('binary_error-mean', res)
# metric in args # metric in args
res = get_cv_result(params=params_verbose, fobj=custom_obj, metrics='binary_error') res = get_cv_result(params=params_verbose, fobj=dummy_obj, metrics='binary_error')
self.assertEqual(len(res), 2) self.assertEqual(len(res), 2)
self.assertIn('binary_error-mean', res) self.assertIn('binary_error-mean', res)
# metric in args overwrites its' alias in params # metric in args overwrites its' alias in params
res = get_cv_result(params=params_metric_inv_verbose, fobj=custom_obj, metrics='binary_error') res = get_cv_result(params=params_metric_inv_verbose, fobj=dummy_obj, metrics='binary_error')
self.assertEqual(len(res), 2) self.assertEqual(len(res), 2)
self.assertIn('binary_error-mean', res) self.assertIn('binary_error-mean', res)
# multiple metrics in params # multiple metrics in params
res = get_cv_result(params=params_metric_multi_verbose, fobj=custom_obj) res = get_cv_result(params=params_metric_multi_verbose, fobj=dummy_obj)
self.assertEqual(len(res), 4) self.assertEqual(len(res), 4)
self.assertIn('binary_logloss-mean', res) self.assertIn('binary_logloss-mean', res)
self.assertIn('binary_error-mean', res) self.assertIn('binary_error-mean', res)
# multiple metrics in args # multiple metrics in args
res = get_cv_result(params=params_verbose, fobj=custom_obj, res = get_cv_result(params=params_verbose, fobj=dummy_obj,
metrics=['binary_logloss', 'binary_error']) metrics=['binary_logloss', 'binary_error'])
self.assertEqual(len(res), 4) self.assertEqual(len(res), 4)
self.assertIn('binary_logloss-mean', res) self.assertIn('binary_logloss-mean', res)
...@@ -1178,89 +1183,89 @@ class TestEngine(unittest.TestCase): ...@@ -1178,89 +1183,89 @@ class TestEngine(unittest.TestCase):
# no fobj, feval # no fobj, feval
# default metric with custom one # default metric with custom one
res = get_cv_result(feval=custom_metric) res = get_cv_result(feval=constant_metric)
self.assertEqual(len(res), 4) self.assertEqual(len(res), 4)
self.assertIn('binary_logloss-mean', res) self.assertIn('binary_logloss-mean', res)
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
# non-default metric in params with custom one # non-default metric in params with custom one
res = get_cv_result(params=params_obj_metric_err_verbose, feval=custom_metric) res = get_cv_result(params=params_obj_metric_err_verbose, feval=constant_metric)
self.assertEqual(len(res), 4) self.assertEqual(len(res), 4)
self.assertIn('binary_error-mean', res) self.assertIn('binary_error-mean', res)
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
# default metric in args with custom one # default metric in args with custom one
res = get_cv_result(metrics='binary_logloss', feval=custom_metric) res = get_cv_result(metrics='binary_logloss', feval=constant_metric)
self.assertEqual(len(res), 4) self.assertEqual(len(res), 4)
self.assertIn('binary_logloss-mean', res) self.assertIn('binary_logloss-mean', res)
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
# non-default metric in args with custom one # non-default metric in args with custom one
res = get_cv_result(metrics='binary_error', feval=custom_metric) res = get_cv_result(metrics='binary_error', feval=constant_metric)
self.assertEqual(len(res), 4) self.assertEqual(len(res), 4)
self.assertIn('binary_error-mean', res) self.assertIn('binary_error-mean', res)
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
# metric in args overwrites one in params, custom one is evaluated too # metric in args overwrites one in params, custom one is evaluated too
res = get_cv_result(params=params_obj_metric_inv_verbose, metrics='binary_error', feval=custom_metric) res = get_cv_result(params=params_obj_metric_inv_verbose, metrics='binary_error', feval=constant_metric)
self.assertEqual(len(res), 4) self.assertEqual(len(res), 4)
self.assertIn('binary_error-mean', res) self.assertIn('binary_error-mean', res)
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
# multiple metrics in params with custom one # multiple metrics in params with custom one
res = get_cv_result(params=params_obj_metric_multi_verbose, feval=custom_metric) res = get_cv_result(params=params_obj_metric_multi_verbose, feval=constant_metric)
self.assertEqual(len(res), 6) self.assertEqual(len(res), 6)
self.assertIn('binary_logloss-mean', res) self.assertIn('binary_logloss-mean', res)
self.assertIn('binary_error-mean', res) self.assertIn('binary_error-mean', res)
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
# multiple metrics in args with custom one # multiple metrics in args with custom one
res = get_cv_result(metrics=['binary_logloss', 'binary_error'], feval=custom_metric) res = get_cv_result(metrics=['binary_logloss', 'binary_error'], feval=constant_metric)
self.assertEqual(len(res), 6) self.assertEqual(len(res), 6)
self.assertIn('binary_logloss-mean', res) self.assertIn('binary_logloss-mean', res)
self.assertIn('binary_error-mean', res) self.assertIn('binary_error-mean', res)
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
# custom metric is evaluated despite 'None' is passed # custom metric is evaluated despite 'None' is passed
res = get_cv_result(metrics=['None'], feval=custom_metric) res = get_cv_result(metrics=['None'], feval=constant_metric)
self.assertEqual(len(res), 2) self.assertEqual(len(res), 2)
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
# fobj, feval # fobj, feval
# no default metric, only custom one # no default metric, only custom one
res = get_cv_result(params=params_verbose, fobj=custom_obj, feval=custom_metric) res = get_cv_result(params=params_verbose, fobj=dummy_obj, feval=constant_metric)
self.assertEqual(len(res), 2) self.assertEqual(len(res), 2)
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
# metric in params with custom one # metric in params with custom one
res = get_cv_result(params=params_metric_err_verbose, fobj=custom_obj, feval=custom_metric) res = get_cv_result(params=params_metric_err_verbose, fobj=dummy_obj, feval=constant_metric)
self.assertEqual(len(res), 4) self.assertEqual(len(res), 4)
self.assertIn('binary_error-mean', res) self.assertIn('binary_error-mean', res)
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
# metric in args with custom one # metric in args with custom one
res = get_cv_result(params=params_verbose, fobj=custom_obj, res = get_cv_result(params=params_verbose, fobj=dummy_obj,
feval=custom_metric, metrics='binary_error') feval=constant_metric, metrics='binary_error')
self.assertEqual(len(res), 4) self.assertEqual(len(res), 4)
self.assertIn('binary_error-mean', res) self.assertIn('binary_error-mean', res)
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
# metric in args overwrites one in params, custom one is evaluated too # metric in args overwrites one in params, custom one is evaluated too
res = get_cv_result(params=params_metric_inv_verbose, fobj=custom_obj, res = get_cv_result(params=params_metric_inv_verbose, fobj=dummy_obj,
feval=custom_metric, metrics='binary_error') feval=constant_metric, metrics='binary_error')
self.assertEqual(len(res), 4) self.assertEqual(len(res), 4)
self.assertIn('binary_error-mean', res) self.assertIn('binary_error-mean', res)
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
# multiple metrics in params with custom one # multiple metrics in params with custom one
res = get_cv_result(params=params_metric_multi_verbose, fobj=custom_obj, feval=custom_metric) res = get_cv_result(params=params_metric_multi_verbose, fobj=dummy_obj, feval=constant_metric)
self.assertEqual(len(res), 6) self.assertEqual(len(res), 6)
self.assertIn('binary_logloss-mean', res) self.assertIn('binary_logloss-mean', res)
self.assertIn('binary_error-mean', res) self.assertIn('binary_error-mean', res)
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
# multiple metrics in args with custom one # multiple metrics in args with custom one
res = get_cv_result(params=params_verbose, fobj=custom_obj, feval=custom_metric, res = get_cv_result(params=params_verbose, fobj=dummy_obj, feval=constant_metric,
metrics=['binary_logloss', 'binary_error']) metrics=['binary_logloss', 'binary_error'])
self.assertEqual(len(res), 6) self.assertEqual(len(res), 6)
self.assertIn('binary_logloss-mean', res) self.assertIn('binary_logloss-mean', res)
...@@ -1268,7 +1273,7 @@ class TestEngine(unittest.TestCase): ...@@ -1268,7 +1273,7 @@ class TestEngine(unittest.TestCase):
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
# custom metric is evaluated despite 'None' is passed # custom metric is evaluated despite 'None' is passed
res = get_cv_result(params=params_metric_none_verbose, fobj=custom_obj, feval=custom_metric) res = get_cv_result(params=params_metric_none_verbose, fobj=dummy_obj, feval=constant_metric)
self.assertEqual(len(res), 2) self.assertEqual(len(res), 2)
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
...@@ -1302,72 +1307,72 @@ class TestEngine(unittest.TestCase): ...@@ -1302,72 +1307,72 @@ class TestEngine(unittest.TestCase):
# fobj, no feval # fobj, no feval
# no default metric # no default metric
train_booster(params=params_verbose, fobj=custom_obj) train_booster(params=params_verbose, fobj=dummy_obj)
self.assertEqual(len(evals_result), 0) self.assertEqual(len(evals_result), 0)
# metric in params # metric in params
train_booster(params=params_metric_log_verbose, fobj=custom_obj) train_booster(params=params_metric_log_verbose, fobj=dummy_obj)
self.assertEqual(len(evals_result['valid_0']), 1) self.assertEqual(len(evals_result['valid_0']), 1)
self.assertIn('binary_logloss', evals_result['valid_0']) self.assertIn('binary_logloss', evals_result['valid_0'])
# multiple metrics in params # multiple metrics in params
train_booster(params=params_metric_multi_verbose, fobj=custom_obj) train_booster(params=params_metric_multi_verbose, fobj=dummy_obj)
self.assertEqual(len(evals_result['valid_0']), 2) self.assertEqual(len(evals_result['valid_0']), 2)
self.assertIn('binary_logloss', evals_result['valid_0']) self.assertIn('binary_logloss', evals_result['valid_0'])
self.assertIn('binary_error', evals_result['valid_0']) self.assertIn('binary_error', evals_result['valid_0'])
# no fobj, feval # no fobj, feval
# default metric with custom one # default metric with custom one
train_booster(feval=custom_metric) train_booster(feval=constant_metric)
self.assertEqual(len(evals_result['valid_0']), 2) self.assertEqual(len(evals_result['valid_0']), 2)
self.assertIn('binary_logloss', evals_result['valid_0']) self.assertIn('binary_logloss', evals_result['valid_0'])
self.assertIn('error', evals_result['valid_0']) self.assertIn('error', evals_result['valid_0'])
# default metric in params with custom one # default metric in params with custom one
train_booster(params=params_obj_metric_log_verbose, feval=custom_metric) train_booster(params=params_obj_metric_log_verbose, feval=constant_metric)
self.assertEqual(len(evals_result['valid_0']), 2) self.assertEqual(len(evals_result['valid_0']), 2)
self.assertIn('binary_logloss', evals_result['valid_0']) self.assertIn('binary_logloss', evals_result['valid_0'])
self.assertIn('error', evals_result['valid_0']) self.assertIn('error', evals_result['valid_0'])
# non-default metric in params with custom one # non-default metric in params with custom one
train_booster(params=params_obj_metric_err_verbose, feval=custom_metric) train_booster(params=params_obj_metric_err_verbose, feval=constant_metric)
self.assertEqual(len(evals_result['valid_0']), 2) self.assertEqual(len(evals_result['valid_0']), 2)
self.assertIn('binary_error', evals_result['valid_0']) self.assertIn('binary_error', evals_result['valid_0'])
self.assertIn('error', evals_result['valid_0']) self.assertIn('error', evals_result['valid_0'])
# multiple metrics in params with custom one # multiple metrics in params with custom one
train_booster(params=params_obj_metric_multi_verbose, feval=custom_metric) train_booster(params=params_obj_metric_multi_verbose, feval=constant_metric)
self.assertEqual(len(evals_result['valid_0']), 3) self.assertEqual(len(evals_result['valid_0']), 3)
self.assertIn('binary_logloss', evals_result['valid_0']) self.assertIn('binary_logloss', evals_result['valid_0'])
self.assertIn('binary_error', evals_result['valid_0']) self.assertIn('binary_error', evals_result['valid_0'])
self.assertIn('error', evals_result['valid_0']) self.assertIn('error', evals_result['valid_0'])
# custom metric is evaluated despite 'None' is passed # custom metric is evaluated despite 'None' is passed
train_booster(params=params_obj_metric_none_verbose, feval=custom_metric) train_booster(params=params_obj_metric_none_verbose, feval=constant_metric)
self.assertEqual(len(evals_result), 1) self.assertEqual(len(evals_result), 1)
self.assertIn('error', evals_result['valid_0']) self.assertIn('error', evals_result['valid_0'])
# fobj, feval # fobj, feval
# no default metric, only custom one # no default metric, only custom one
train_booster(params=params_verbose, fobj=custom_obj, feval=custom_metric) train_booster(params=params_verbose, fobj=dummy_obj, feval=constant_metric)
self.assertEqual(len(evals_result['valid_0']), 1) self.assertEqual(len(evals_result['valid_0']), 1)
self.assertIn('error', evals_result['valid_0']) self.assertIn('error', evals_result['valid_0'])
# metric in params with custom one # metric in params with custom one
train_booster(params=params_metric_log_verbose, fobj=custom_obj, feval=custom_metric) train_booster(params=params_metric_log_verbose, fobj=dummy_obj, feval=constant_metric)
self.assertEqual(len(evals_result['valid_0']), 2) self.assertEqual(len(evals_result['valid_0']), 2)
self.assertIn('binary_logloss', evals_result['valid_0']) self.assertIn('binary_logloss', evals_result['valid_0'])
self.assertIn('error', evals_result['valid_0']) self.assertIn('error', evals_result['valid_0'])
# multiple metrics in params with custom one # multiple metrics in params with custom one
train_booster(params=params_metric_multi_verbose, fobj=custom_obj, feval=custom_metric) train_booster(params=params_metric_multi_verbose, fobj=dummy_obj, feval=constant_metric)
self.assertEqual(len(evals_result['valid_0']), 3) self.assertEqual(len(evals_result['valid_0']), 3)
self.assertIn('binary_logloss', evals_result['valid_0']) self.assertIn('binary_logloss', evals_result['valid_0'])
self.assertIn('binary_error', evals_result['valid_0']) self.assertIn('binary_error', evals_result['valid_0'])
self.assertIn('error', evals_result['valid_0']) self.assertIn('error', evals_result['valid_0'])
# custom metric is evaluated despite 'None' is passed # custom metric is evaluated despite 'None' is passed
train_booster(params=params_metric_none_verbose, fobj=custom_obj, feval=custom_metric) train_booster(params=params_metric_none_verbose, fobj=dummy_obj, feval=constant_metric)
self.assertEqual(len(evals_result), 1) self.assertEqual(len(evals_result), 1)
self.assertIn('error', evals_result['valid_0']) self.assertIn('error', evals_result['valid_0'])
...@@ -1384,25 +1389,25 @@ class TestEngine(unittest.TestCase): ...@@ -1384,25 +1389,25 @@ class TestEngine(unittest.TestCase):
self.assertEqual(len(res), 2) self.assertEqual(len(res), 2)
self.assertIn('multi_logloss-mean', res) self.assertIn('multi_logloss-mean', res)
# multiclass default metric with custom one # multiclass default metric with custom one
res = get_cv_result(params_obj_class_3_verbose, feval=custom_metric) res = get_cv_result(params_obj_class_3_verbose, feval=constant_metric)
self.assertEqual(len(res), 4) self.assertEqual(len(res), 4)
self.assertIn('multi_logloss-mean', res) self.assertIn('multi_logloss-mean', res)
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
# multiclass metric alias with custom one for custom objective # multiclass metric alias with custom one for custom objective
res = get_cv_result(params_obj_class_3_verbose, fobj=custom_obj, feval=custom_metric) res = get_cv_result(params_obj_class_3_verbose, fobj=dummy_obj, feval=constant_metric)
self.assertEqual(len(res), 2) self.assertEqual(len(res), 2)
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
# no metric for invalid class_num # no metric for invalid class_num
res = get_cv_result(params_obj_class_1_verbose, fobj=custom_obj) res = get_cv_result(params_obj_class_1_verbose, fobj=dummy_obj)
self.assertEqual(len(res), 0) self.assertEqual(len(res), 0)
# custom metric for invalid class_num # custom metric for invalid class_num
res = get_cv_result(params_obj_class_1_verbose, fobj=custom_obj, feval=custom_metric) res = get_cv_result(params_obj_class_1_verbose, fobj=dummy_obj, feval=constant_metric)
self.assertEqual(len(res), 2) self.assertEqual(len(res), 2)
self.assertIn('error-mean', res) self.assertIn('error-mean', res)
# multiclass metric alias with custom one with invalid class_num # multiclass metric alias with custom one with invalid class_num
self.assertRaises(lgb.basic.LightGBMError, get_cv_result, self.assertRaises(lgb.basic.LightGBMError, get_cv_result,
params_obj_class_1_verbose, metrics=obj_multi_alias, params_obj_class_1_verbose, metrics=obj_multi_alias,
fobj=custom_obj, feval=custom_metric) fobj=dummy_obj, feval=constant_metric)
# multiclass default metric without num_class # multiclass default metric without num_class
self.assertRaises(lgb.basic.LightGBMError, get_cv_result, self.assertRaises(lgb.basic.LightGBMError, get_cv_result,
params_obj_verbose) params_obj_verbose)
...@@ -1423,20 +1428,20 @@ class TestEngine(unittest.TestCase): ...@@ -1423,20 +1428,20 @@ class TestEngine(unittest.TestCase):
self.assertRaises(lgb.basic.LightGBMError, get_cv_result, self.assertRaises(lgb.basic.LightGBMError, get_cv_result,
params_class_3_verbose) params_class_3_verbose)
# no metric with non-default num_class for custom objective # no metric with non-default num_class for custom objective
res = get_cv_result(params_class_3_verbose, fobj=custom_obj) res = get_cv_result(params_class_3_verbose, fobj=dummy_obj)
self.assertEqual(len(res), 0) self.assertEqual(len(res), 0)
for metric_multi_alias in obj_multi_aliases + ['multi_logloss']: for metric_multi_alias in obj_multi_aliases + ['multi_logloss']:
# multiclass metric alias for custom objective # multiclass metric alias for custom objective
res = get_cv_result(params_class_3_verbose, metrics=metric_multi_alias, fobj=custom_obj) res = get_cv_result(params_class_3_verbose, metrics=metric_multi_alias, fobj=dummy_obj)
self.assertEqual(len(res), 2) self.assertEqual(len(res), 2)
self.assertIn('multi_logloss-mean', res) self.assertIn('multi_logloss-mean', res)
# multiclass metric for custom objective # multiclass metric for custom objective
res = get_cv_result(params_class_3_verbose, metrics='multi_error', fobj=custom_obj) res = get_cv_result(params_class_3_verbose, metrics='multi_error', fobj=dummy_obj)
self.assertEqual(len(res), 2) self.assertEqual(len(res), 2)
self.assertIn('multi_error-mean', res) self.assertIn('multi_error-mean', res)
# binary metric with non-default num_class for custom objective # binary metric with non-default num_class for custom objective
self.assertRaises(lgb.basic.LightGBMError, get_cv_result, self.assertRaises(lgb.basic.LightGBMError, get_cv_result,
params_class_3_verbose, metrics='binary_error', fobj=custom_obj) params_class_3_verbose, metrics='binary_error', fobj=dummy_obj)
@unittest.skipIf(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, 'not enough RAM') @unittest.skipIf(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, 'not enough RAM')
def test_model_size(self): def test_model_size(self):
...@@ -1543,12 +1548,6 @@ class TestEngine(unittest.TestCase): ...@@ -1543,12 +1548,6 @@ class TestEngine(unittest.TestCase):
def test_early_stopping_for_only_first_metric(self): def test_early_stopping_for_only_first_metric(self):
def decreasing_metric(preds, train_data):
return ('decreasing_metric', next(decreasing_generator), False)
def constant_metric(preds, train_data):
return ('constant_metric', 0.0, False)
def metrics_combination_train_regression(valid_sets, metric_list, assumed_iteration, def metrics_combination_train_regression(valid_sets, metric_list, assumed_iteration,
first_metric_only, feval=None): first_metric_only, feval=None):
params = { params = {
...@@ -1582,7 +1581,6 @@ class TestEngine(unittest.TestCase): ...@@ -1582,7 +1581,6 @@ class TestEngine(unittest.TestCase):
eval_train_metric=eval_train_metric) eval_train_metric=eval_train_metric)
self.assertEqual(assumed_iteration, len(ret[list(ret.keys())[0]])) self.assertEqual(assumed_iteration, len(ret[list(ret.keys())[0]]))
decreasing_generator = itertools.count(0, -1)
X, y = load_boston(True) X, y = load_boston(True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_test1, X_test2, y_test1, y_test2 = train_test_split(X_test, y_test, test_size=0.5, random_state=73) X_test1, X_test2, y_test1, y_test2 = train_test_split(X_test, y_test, test_size=0.5, random_state=73)
...@@ -1683,8 +1681,7 @@ class TestEngine(unittest.TestCase): ...@@ -1683,8 +1681,7 @@ class TestEngine(unittest.TestCase):
self.assertLess(ret, 0.13) self.assertLess(ret, 0.13)
self.assertAlmostEqual(evals_result['valid_0']['binary_logloss'][-1], ret, places=5) self.assertAlmostEqual(evals_result['valid_0']['binary_logloss'][-1], ret, places=5)
params['feature_fraction'] = 0.5 params['feature_fraction'] = 0.5
gbm2 = lgb.train(params, lgb_train, gbm2 = lgb.train(params, lgb_train, num_boost_round=25)
num_boost_round=25)
ret2 = log_loss(y_test, gbm2.predict(X_test)) ret2 = log_loss(y_test, gbm2.predict(X_test))
self.assertNotEqual(ret, ret2) self.assertNotEqual(ret, ret2)
...@@ -1700,10 +1697,9 @@ class TestEngine(unittest.TestCase): ...@@ -1700,10 +1697,9 @@ class TestEngine(unittest.TestCase):
'forcedbins_filename': forcedbins_filename, 'forcedbins_filename': forcedbins_filename,
'num_leaves': 2, 'num_leaves': 2,
'min_data_in_leaf': 1, 'min_data_in_leaf': 1,
'verbose': -1, 'verbose': -1}
'seed': 0}
lgb_x = lgb.Dataset(x, label=y) lgb_x = lgb.Dataset(x, label=y)
est = lgb.train(params, lgb_x, num_boost_round=100) est = lgb.train(params, lgb_x, num_boost_round=20)
new_x = np.zeros((3, x.shape[1])) new_x = np.zeros((3, x.shape[1]))
new_x[:, 0] = [0.31, 0.37, 0.41] new_x[:, 0] = [0.31, 0.37, 0.41]
new_x[:, 1] = [0, 0, 0] new_x[:, 1] = [0, 0, 0]
...@@ -1715,16 +1711,16 @@ class TestEngine(unittest.TestCase): ...@@ -1715,16 +1711,16 @@ class TestEngine(unittest.TestCase):
self.assertEqual(len(np.unique(predicted)), 1) self.assertEqual(len(np.unique(predicted)), 1)
params['forcedbins_filename'] = '' params['forcedbins_filename'] = ''
lgb_x = lgb.Dataset(x, label=y) lgb_x = lgb.Dataset(x, label=y)
est = lgb.train(params, lgb_x, num_boost_round=100) est = lgb.train(params, lgb_x, num_boost_round=20)
predicted = est.predict(new_x) predicted = est.predict(new_x)
self.assertEqual(len(np.unique(predicted)), 3) self.assertEqual(len(np.unique(predicted)), 3)
params['forcedbins_filename'] = os.path.join(os.path.dirname(os.path.realpath(__file__)), params['forcedbins_filename'] = os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/regression/forced_bins2.json') '../../examples/regression/forced_bins2.json')
params['max_bin'] = 11 params['max_bin'] = 11
lgb_x = lgb.Dataset(x[:, :1], label=y) lgb_x = lgb.Dataset(x[:, :1], label=y)
est = lgb.train(params, lgb_x, num_boost_round=100) est = lgb.train(params, lgb_x, num_boost_round=50)
predicted = est.predict(x[1:, :1]) predicted = est.predict(x[1:, :1])
vals, counts = np.unique(predicted, return_counts=True) _, counts = np.unique(predicted, return_counts=True)
self.assertGreaterEqual(min(counts), 9) self.assertGreaterEqual(min(counts), 9)
self.assertLessEqual(max(counts), 11) self.assertLessEqual(max(counts), 11)
...@@ -1741,7 +1737,7 @@ class TestEngine(unittest.TestCase): ...@@ -1741,7 +1737,7 @@ class TestEngine(unittest.TestCase):
'verbose': -1, 'verbose': -1,
'seed': 0} 'seed': 0}
lgb_x = lgb.Dataset(x, label=y) lgb_x = lgb.Dataset(x, label=y)
est = lgb.train(params, lgb_x, num_boost_round=100) est = lgb.train(params, lgb_x, num_boost_round=20)
new_x = np.zeros((3, 2)) new_x = np.zeros((3, 2))
new_x[:, 0] = [-1, 0, 1] new_x[:, 0] = [-1, 0, 1]
predicted = est.predict(new_x) predicted = est.predict(new_x)
......
...@@ -18,12 +18,7 @@ from sklearn.utils.estimator_checks import (_yield_all_checks, SkipTest, ...@@ -18,12 +18,7 @@ from sklearn.utils.estimator_checks import (_yield_all_checks, SkipTest,
check_parameters_default_constructible) check_parameters_default_constructible)
def multi_error(y_true, y_pred): decreasing_generator = itertools.count(0, -1)
return np.mean(y_true != y_pred)
def multi_logloss(y_true, y_pred):
return np.mean([-math.log(y_pred[i][y]) for i, y in enumerate(y_true)])
def custom_asymmetric_obj(y_true, y_pred): def custom_asymmetric_obj(y_true, y_pred):
...@@ -33,10 +28,47 @@ def custom_asymmetric_obj(y_true, y_pred): ...@@ -33,10 +28,47 @@ def custom_asymmetric_obj(y_true, y_pred):
return grad, hess return grad, hess
def objective_ls(y_true, y_pred):
grad = (y_pred - y_true)
hess = np.ones(len(y_true))
return grad, hess
def logregobj(y_true, y_pred):
y_pred = 1.0 / (1.0 + np.exp(-y_pred))
grad = y_pred - y_true
hess = y_pred * (1.0 - y_pred)
return grad, hess
def custom_dummy_obj(y_true, y_pred):
return np.ones(y_true.shape), np.ones(y_true.shape)
def constant_metric(y_true, y_pred):
return 'error', 0, False
def decreasing_metric(y_true, y_pred):
return ('decreasing_metric', next(decreasing_generator), False)
def mse(y_true, y_pred): def mse(y_true, y_pred):
return 'custom MSE', mean_squared_error(y_true, y_pred), False return 'custom MSE', mean_squared_error(y_true, y_pred), False
def binary_error(y_true, y_pred):
return np.mean((y_pred > 0.5) != y_true)
def multi_error(y_true, y_pred):
return np.mean(y_true != y_pred)
def multi_logloss(y_true, y_pred):
return np.mean([-math.log(y_pred[i][y]) for i, y in enumerate(y_true)])
class TestSklearn(unittest.TestCase): class TestSklearn(unittest.TestCase):
def test_binary(self): def test_binary(self):
...@@ -45,7 +77,7 @@ class TestSklearn(unittest.TestCase): ...@@ -45,7 +77,7 @@ class TestSklearn(unittest.TestCase):
gbm = lgb.LGBMClassifier(n_estimators=50, silent=True) gbm = lgb.LGBMClassifier(n_estimators=50, silent=True)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
ret = log_loss(y_test, gbm.predict_proba(X_test)) ret = log_loss(y_test, gbm.predict_proba(X_test))
self.assertLess(ret, 0.15) self.assertLess(ret, 0.11)
self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['binary_logloss'][gbm.best_iteration_ - 1], places=5) self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['binary_logloss'][gbm.best_iteration_ - 1], places=5)
def test_regression(self): def test_regression(self):
...@@ -54,7 +86,7 @@ class TestSklearn(unittest.TestCase): ...@@ -54,7 +86,7 @@ class TestSklearn(unittest.TestCase):
gbm = lgb.LGBMRegressor(n_estimators=50, silent=True) gbm = lgb.LGBMRegressor(n_estimators=50, silent=True)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
ret = mean_squared_error(y_test, gbm.predict(X_test)) ret = mean_squared_error(y_test, gbm.predict(X_test))
self.assertLess(ret, 16) self.assertLess(ret, 7)
self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1], places=5) self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1], places=5)
def test_multiclass(self): def test_multiclass(self):
...@@ -63,8 +95,9 @@ class TestSklearn(unittest.TestCase): ...@@ -63,8 +95,9 @@ class TestSklearn(unittest.TestCase):
gbm = lgb.LGBMClassifier(n_estimators=50, silent=True) gbm = lgb.LGBMClassifier(n_estimators=50, silent=True)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
ret = multi_error(y_test, gbm.predict(X_test)) ret = multi_error(y_test, gbm.predict(X_test))
self.assertLess(ret, 0.2) self.assertLess(ret, 0.05)
ret = multi_logloss(y_test, gbm.predict_proba(X_test)) ret = multi_logloss(y_test, gbm.predict_proba(X_test))
self.assertLess(ret, 0.15)
self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['multi_logloss'][gbm.best_iteration_ - 1], places=5) self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['multi_logloss'][gbm.best_iteration_ - 1], places=5)
def test_lambdarank(self): def test_lambdarank(self):
...@@ -76,76 +109,64 @@ class TestSklearn(unittest.TestCase): ...@@ -76,76 +109,64 @@ class TestSklearn(unittest.TestCase):
'../../examples/lambdarank/rank.train.query')) '../../examples/lambdarank/rank.train.query'))
q_test = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)), q_test = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/lambdarank/rank.test.query')) '../../examples/lambdarank/rank.test.query'))
gbm = lgb.LGBMRanker() gbm = lgb.LGBMRanker(n_estimators=50)
gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)], gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)],
eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False, eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False,
callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))]) callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))])
self.assertLessEqual(gbm.best_iteration_, 25) self.assertLessEqual(gbm.best_iteration_, 24)
self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.6333) self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.6333)
self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.6048) self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.6048)
def test_regression_with_custom_objective(self): def test_regression_with_custom_objective(self):
def objective_ls(y_true, y_pred):
grad = (y_pred - y_true)
hess = np.ones(len(y_true))
return grad, hess
X, y = load_boston(True) X, y = load_boston(True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=50, silent=True, objective=objective_ls) gbm = lgb.LGBMRegressor(n_estimators=50, silent=True, objective=objective_ls)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
ret = mean_squared_error(y_test, gbm.predict(X_test)) ret = mean_squared_error(y_test, gbm.predict(X_test))
self.assertLess(ret, 100) self.assertLess(ret, 7.0)
self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1], places=5) self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1], places=5)
def test_binary_classification_with_custom_objective(self): def test_binary_classification_with_custom_objective(self):
def logregobj(y_true, y_pred):
y_pred = 1.0 / (1.0 + np.exp(-y_pred))
grad = y_pred - y_true
hess = y_pred * (1.0 - y_pred)
return grad, hess
def binary_error(y_test, y_pred):
return np.mean([int(p > 0.5) != y for y, p in zip(y_test, y_pred)])
X, y = load_digits(2, True) X, y = load_digits(2, True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMClassifier(n_estimators=50, silent=True, objective=logregobj) gbm = lgb.LGBMClassifier(n_estimators=50, silent=True, objective=logregobj)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
ret = binary_error(y_test, gbm.predict(X_test)) ret = binary_error(y_test, gbm.predict(X_test))
self.assertLess(ret, 0.1) self.assertLess(ret, 0.05)
def test_dart(self): def test_dart(self):
X, y = load_boston(True) X, y = load_boston(True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(boosting_type='dart') gbm = lgb.LGBMRegressor(boosting_type='dart', n_estimators=50)
gbm.fit(X_train, y_train) gbm.fit(X_train, y_train)
self.assertLessEqual(gbm.score(X_train, y_train), 1.) score = gbm.score(X_test, y_test)
self.assertGreaterEqual(score, 0.8)
self.assertLessEqual(score, 1.)
def test_grid_search(self): def test_grid_search(self):
X, y = load_boston(True) X, y = load_boston(True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
params = {'boosting_type': ['dart', 'gbdt'], params = {'boosting_type': ['dart', 'gbdt'],
'n_estimators': [5, 8], 'n_estimators': [5, 8],
'drop_rate': [0.05, 0.1]} 'drop_rate': [0.05, 0.1]}
gbm = GridSearchCV(lgb.LGBMRegressor(), params, cv=3) grid = GridSearchCV(lgb.LGBMRegressor(n_estimators=10), params, cv=3)
gbm.fit(X_train, y_train) grid.fit(X, y)
self.assertIn(gbm.best_params_['n_estimators'], [5, 8]) self.assertIn(grid.best_params_['boosting_type'], ['dart', 'gbdt'])
self.assertIn(grid.best_params_['n_estimators'], [5, 8])
self.assertIn(grid.best_params_['drop_rate'], [0.05, 0.1])
self.assertLess(grid.best_score_, 0.3)
def test_clone_and_property(self): def test_clone_and_property(self):
X, y = load_boston(True) X, y = load_boston(True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) gbm = lgb.LGBMRegressor(n_estimators=10, silent=True)
gbm = lgb.LGBMRegressor(n_estimators=100, silent=True) gbm.fit(X, y, verbose=False)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=10, verbose=False)
gbm_clone = clone(gbm) gbm_clone = clone(gbm)
self.assertIsInstance(gbm.booster_, lgb.Booster) self.assertIsInstance(gbm.booster_, lgb.Booster)
self.assertIsInstance(gbm.feature_importances_, np.ndarray) self.assertIsInstance(gbm.feature_importances_, np.ndarray)
X, y = load_digits(2, True) X, y = load_digits(2, True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) clf = lgb.LGBMClassifier(n_estimators=10, silent=True)
clf = lgb.LGBMClassifier() clf.fit(X, y, verbose=False)
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=10, verbose=False)
self.assertListEqual(sorted(clf.classes_), [0, 1]) self.assertListEqual(sorted(clf.classes_), [0, 1])
self.assertEqual(clf.n_classes_, 2) self.assertEqual(clf.n_classes_, 2)
self.assertIsInstance(clf.booster_, lgb.Booster) self.assertIsInstance(clf.booster_, lgb.Booster)
...@@ -177,15 +198,15 @@ class TestSklearn(unittest.TestCase): ...@@ -177,15 +198,15 @@ class TestSklearn(unittest.TestCase):
np.testing.assert_allclose(pred_origin, pred_pickle) np.testing.assert_allclose(pred_origin, pred_pickle)
def test_feature_importances_single_leaf(self): def test_feature_importances_single_leaf(self):
clf = lgb.LGBMClassifier(n_estimators=100)
data = load_iris() data = load_iris()
clf = lgb.LGBMClassifier(n_estimators=10)
clf.fit(data.data, data.target) clf.fit(data.data, data.target)
importances = clf.feature_importances_ importances = clf.feature_importances_
self.assertEqual(len(importances), 4) self.assertEqual(len(importances), 4)
def test_feature_importances_type(self): def test_feature_importances_type(self):
clf = lgb.LGBMClassifier(n_estimators=100)
data = load_iris() data = load_iris()
clf = lgb.LGBMClassifier(n_estimators=10)
clf.fit(data.data, data.target) clf.fit(data.data, data.target)
clf.set_params(importance_type='split') clf.set_params(importance_type='split')
importances_split = clf.feature_importances_ importances_split = clf.feature_importances_
...@@ -237,21 +258,21 @@ class TestSklearn(unittest.TestCase): ...@@ -237,21 +258,21 @@ class TestSklearn(unittest.TestCase):
X[cat_cols_actual] = X[cat_cols_actual].astype('category') X[cat_cols_actual] = X[cat_cols_actual].astype('category')
X_test[cat_cols_actual] = X_test[cat_cols_actual].astype('category') X_test[cat_cols_actual] = X_test[cat_cols_actual].astype('category')
cat_values = [X[col].cat.categories.tolist() for col in cat_cols_to_store] cat_values = [X[col].cat.categories.tolist() for col in cat_cols_to_store]
gbm0 = lgb.sklearn.LGBMClassifier().fit(X, y) gbm0 = lgb.sklearn.LGBMClassifier(n_estimators=10).fit(X, y)
pred0 = gbm0.predict(X_test, raw_score=True) pred0 = gbm0.predict(X_test, raw_score=True)
pred_prob = gbm0.predict_proba(X_test)[:, 1] pred_prob = gbm0.predict_proba(X_test)[:, 1]
gbm1 = lgb.sklearn.LGBMClassifier().fit(X, pd.Series(y), categorical_feature=[0]) gbm1 = lgb.sklearn.LGBMClassifier(n_estimators=10).fit(X, pd.Series(y), categorical_feature=[0])
pred1 = gbm1.predict(X_test, raw_score=True) pred1 = gbm1.predict(X_test, raw_score=True)
gbm2 = lgb.sklearn.LGBMClassifier().fit(X, y, categorical_feature=['A']) gbm2 = lgb.sklearn.LGBMClassifier(n_estimators=10).fit(X, y, categorical_feature=['A'])
pred2 = gbm2.predict(X_test, raw_score=True) pred2 = gbm2.predict(X_test, raw_score=True)
gbm3 = lgb.sklearn.LGBMClassifier().fit(X, y, categorical_feature=['A', 'B', 'C', 'D']) gbm3 = lgb.sklearn.LGBMClassifier(n_estimators=10).fit(X, y, categorical_feature=['A', 'B', 'C', 'D'])
pred3 = gbm3.predict(X_test, raw_score=True) pred3 = gbm3.predict(X_test, raw_score=True)
gbm3.booster_.save_model('categorical.model') gbm3.booster_.save_model('categorical.model')
gbm4 = lgb.Booster(model_file='categorical.model') gbm4 = lgb.Booster(model_file='categorical.model')
pred4 = gbm4.predict(X_test) pred4 = gbm4.predict(X_test)
gbm5 = lgb.sklearn.LGBMClassifier().fit(X, y, categorical_feature=['E']) gbm5 = lgb.sklearn.LGBMClassifier(n_estimators=10).fit(X, y, categorical_feature=['A', 'B', 'C', 'D', 'E'])
pred5 = gbm5.predict(X_test, raw_score=True) pred5 = gbm5.predict(X_test, raw_score=True)
gbm6 = lgb.sklearn.LGBMClassifier().fit(X, y, categorical_feature=[]) gbm6 = lgb.sklearn.LGBMClassifier(n_estimators=10).fit(X, y, categorical_feature=[])
pred6 = gbm6.predict(X_test, raw_score=True) pred6 = gbm6.predict(X_test, raw_score=True)
self.assertRaises(AssertionError, self.assertRaises(AssertionError,
np.testing.assert_allclose, np.testing.assert_allclose,
...@@ -289,7 +310,7 @@ class TestSklearn(unittest.TestCase): ...@@ -289,7 +310,7 @@ class TestSklearn(unittest.TestCase):
if pd.__version__ >= '0.24.0': if pd.__version__ >= '0.24.0':
for dtype in pd.concat([X.dtypes, X_test.dtypes, pd.Series(y.dtypes)]): for dtype in pd.concat([X.dtypes, X_test.dtypes, pd.Series(y.dtypes)]):
self.assertTrue(pd.api.types.is_sparse(dtype)) self.assertTrue(pd.api.types.is_sparse(dtype))
gbm = lgb.sklearn.LGBMClassifier().fit(X, y) gbm = lgb.sklearn.LGBMClassifier(n_estimators=10).fit(X, y)
pred_sparse = gbm.predict(X_test, raw_score=True) pred_sparse = gbm.predict(X_test, raw_score=True)
if hasattr(X_test, 'sparse'): if hasattr(X_test, 'sparse'):
pred_dense = gbm.predict(X_test.sparse.to_dense(), raw_score=True) pred_dense = gbm.predict(X_test.sparse.to_dense(), raw_score=True)
...@@ -298,6 +319,7 @@ class TestSklearn(unittest.TestCase): ...@@ -298,6 +319,7 @@ class TestSklearn(unittest.TestCase):
np.testing.assert_allclose(pred_sparse, pred_dense) np.testing.assert_allclose(pred_sparse, pred_dense)
def test_predict(self): def test_predict(self):
# With default params
iris = load_iris() iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
test_size=0.2, random_state=42) test_size=0.2, random_state=42)
...@@ -356,14 +378,8 @@ class TestSklearn(unittest.TestCase): ...@@ -356,14 +378,8 @@ class TestSklearn(unittest.TestCase):
self.assertIn('l2', gbm.evals_result_['valid_1']) self.assertIn('l2', gbm.evals_result_['valid_1'])
def test_metrics(self): def test_metrics(self):
def custom_obj(y_true, y_pred):
return np.zeros(y_true.shape), np.zeros(y_true.shape)
def custom_metric(y_true, y_pred):
return 'error', 0, False
X, y = load_boston(True) X, y = load_boston(True)
params = {'n_estimators': 5, 'verbose': -1} params = {'n_estimators': 2, 'verbose': -1}
params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False} params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
# no custom objective, no custom metric # no custom objective, no custom metric
...@@ -441,43 +457,43 @@ class TestSklearn(unittest.TestCase): ...@@ -441,43 +457,43 @@ class TestSklearn(unittest.TestCase):
# custom objective, no custom metric # custom objective, no custom metric
# default regression metric for custom objective # default regression metric for custom objective
gbm = lgb.LGBMRegressor(objective=custom_obj, **params).fit(**params_fit) gbm = lgb.LGBMRegressor(objective=custom_dummy_obj, **params).fit(**params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 1) self.assertEqual(len(gbm.evals_result_['training']), 1)
self.assertIn('l2', gbm.evals_result_['training']) self.assertIn('l2', gbm.evals_result_['training'])
# non-default regression metric for custom objective # non-default regression metric for custom objective
gbm = lgb.LGBMRegressor(objective=custom_obj, metric='mape', **params).fit(**params_fit) gbm = lgb.LGBMRegressor(objective=custom_dummy_obj, metric='mape', **params).fit(**params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 1) self.assertEqual(len(gbm.evals_result_['training']), 1)
self.assertIn('mape', gbm.evals_result_['training']) self.assertIn('mape', gbm.evals_result_['training'])
# multiple regression metrics for custom objective # multiple regression metrics for custom objective
gbm = lgb.LGBMRegressor(objective=custom_obj, metric=['l1', 'gamma'], gbm = lgb.LGBMRegressor(objective=custom_dummy_obj, metric=['l1', 'gamma'],
**params).fit(**params_fit) **params).fit(**params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 2) self.assertEqual(len(gbm.evals_result_['training']), 2)
self.assertIn('l1', gbm.evals_result_['training']) self.assertIn('l1', gbm.evals_result_['training'])
self.assertIn('gamma', gbm.evals_result_['training']) self.assertIn('gamma', gbm.evals_result_['training'])
# no metric # no metric
gbm = lgb.LGBMRegressor(objective=custom_obj, metric='None', gbm = lgb.LGBMRegressor(objective=custom_dummy_obj, metric='None',
**params).fit(**params_fit) **params).fit(**params_fit)
self.assertIs(gbm.evals_result_, None) self.assertIs(gbm.evals_result_, None)
# default regression metric with non-default metric in eval_metric for custom objective # default regression metric with non-default metric in eval_metric for custom objective
gbm = lgb.LGBMRegressor(objective=custom_obj, gbm = lgb.LGBMRegressor(objective=custom_dummy_obj,
**params).fit(eval_metric='mape', **params_fit) **params).fit(eval_metric='mape', **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 2) self.assertEqual(len(gbm.evals_result_['training']), 2)
self.assertIn('l2', gbm.evals_result_['training']) self.assertIn('l2', gbm.evals_result_['training'])
self.assertIn('mape', gbm.evals_result_['training']) self.assertIn('mape', gbm.evals_result_['training'])
# non-default regression metric with metric in eval_metric for custom objective # non-default regression metric with metric in eval_metric for custom objective
gbm = lgb.LGBMRegressor(objective=custom_obj, metric='mape', gbm = lgb.LGBMRegressor(objective=custom_dummy_obj, metric='mape',
**params).fit(eval_metric='gamma', **params_fit) **params).fit(eval_metric='gamma', **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 2) self.assertEqual(len(gbm.evals_result_['training']), 2)
self.assertIn('mape', gbm.evals_result_['training']) self.assertIn('mape', gbm.evals_result_['training'])
self.assertIn('gamma', gbm.evals_result_['training']) self.assertIn('gamma', gbm.evals_result_['training'])
# multiple regression metrics with metric in eval_metric for custom objective # multiple regression metrics with metric in eval_metric for custom objective
gbm = lgb.LGBMRegressor(objective=custom_obj, metric=['l1', 'gamma'], gbm = lgb.LGBMRegressor(objective=custom_dummy_obj, metric=['l1', 'gamma'],
**params).fit(eval_metric='l2', **params_fit) **params).fit(eval_metric='l2', **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 3) self.assertEqual(len(gbm.evals_result_['training']), 3)
self.assertIn('l1', gbm.evals_result_['training']) self.assertIn('l1', gbm.evals_result_['training'])
...@@ -485,7 +501,7 @@ class TestSklearn(unittest.TestCase): ...@@ -485,7 +501,7 @@ class TestSklearn(unittest.TestCase):
self.assertIn('l2', gbm.evals_result_['training']) self.assertIn('l2', gbm.evals_result_['training'])
# multiple regression metrics with multiple metrics in eval_metric for custom objective # multiple regression metrics with multiple metrics in eval_metric for custom objective
gbm = lgb.LGBMRegressor(objective=custom_obj, metric=['l1', 'gamma'], gbm = lgb.LGBMRegressor(objective=custom_dummy_obj, metric=['l1', 'gamma'],
**params).fit(eval_metric=['l2', 'mape'], **params_fit) **params).fit(eval_metric=['l2', 'mape'], **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 4) self.assertEqual(len(gbm.evals_result_['training']), 4)
self.assertIn('l1', gbm.evals_result_['training']) self.assertIn('l1', gbm.evals_result_['training'])
...@@ -495,21 +511,21 @@ class TestSklearn(unittest.TestCase): ...@@ -495,21 +511,21 @@ class TestSklearn(unittest.TestCase):
# no custom objective, custom metric # no custom objective, custom metric
# default metric with custom metric # default metric with custom metric
gbm = lgb.LGBMRegressor(**params).fit(eval_metric=custom_metric, **params_fit) gbm = lgb.LGBMRegressor(**params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 2) self.assertEqual(len(gbm.evals_result_['training']), 2)
self.assertIn('l2', gbm.evals_result_['training']) self.assertIn('l2', gbm.evals_result_['training'])
self.assertIn('error', gbm.evals_result_['training']) self.assertIn('error', gbm.evals_result_['training'])
# non-default metric with custom metric # non-default metric with custom metric
gbm = lgb.LGBMRegressor(metric='mape', gbm = lgb.LGBMRegressor(metric='mape',
**params).fit(eval_metric=custom_metric, **params_fit) **params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 2) self.assertEqual(len(gbm.evals_result_['training']), 2)
self.assertIn('mape', gbm.evals_result_['training']) self.assertIn('mape', gbm.evals_result_['training'])
self.assertIn('error', gbm.evals_result_['training']) self.assertIn('error', gbm.evals_result_['training'])
# multiple metrics with custom metric # multiple metrics with custom metric
gbm = lgb.LGBMRegressor(metric=['l1', 'gamma'], gbm = lgb.LGBMRegressor(metric=['l1', 'gamma'],
**params).fit(eval_metric=custom_metric, **params_fit) **params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 3) self.assertEqual(len(gbm.evals_result_['training']), 3)
self.assertIn('l1', gbm.evals_result_['training']) self.assertIn('l1', gbm.evals_result_['training'])
self.assertIn('gamma', gbm.evals_result_['training']) self.assertIn('gamma', gbm.evals_result_['training'])
...@@ -517,27 +533,27 @@ class TestSklearn(unittest.TestCase): ...@@ -517,27 +533,27 @@ class TestSklearn(unittest.TestCase):
# custom metric (disable default metric) # custom metric (disable default metric)
gbm = lgb.LGBMRegressor(metric='None', gbm = lgb.LGBMRegressor(metric='None',
**params).fit(eval_metric=custom_metric, **params_fit) **params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 1) self.assertEqual(len(gbm.evals_result_['training']), 1)
self.assertIn('error', gbm.evals_result_['training']) self.assertIn('error', gbm.evals_result_['training'])
# default metric for non-default objective with custom metric # default metric for non-default objective with custom metric
gbm = lgb.LGBMRegressor(objective='regression_l1', gbm = lgb.LGBMRegressor(objective='regression_l1',
**params).fit(eval_metric=custom_metric, **params_fit) **params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 2) self.assertEqual(len(gbm.evals_result_['training']), 2)
self.assertIn('l1', gbm.evals_result_['training']) self.assertIn('l1', gbm.evals_result_['training'])
self.assertIn('error', gbm.evals_result_['training']) self.assertIn('error', gbm.evals_result_['training'])
# non-default metric for non-default objective with custom metric # non-default metric for non-default objective with custom metric
gbm = lgb.LGBMRegressor(objective='regression_l1', metric='mape', gbm = lgb.LGBMRegressor(objective='regression_l1', metric='mape',
**params).fit(eval_metric=custom_metric, **params_fit) **params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 2) self.assertEqual(len(gbm.evals_result_['training']), 2)
self.assertIn('mape', gbm.evals_result_['training']) self.assertIn('mape', gbm.evals_result_['training'])
self.assertIn('error', gbm.evals_result_['training']) self.assertIn('error', gbm.evals_result_['training'])
# multiple metrics for non-default objective with custom metric # multiple metrics for non-default objective with custom metric
gbm = lgb.LGBMRegressor(objective='regression_l1', metric=['l1', 'gamma'], gbm = lgb.LGBMRegressor(objective='regression_l1', metric=['l1', 'gamma'],
**params).fit(eval_metric=custom_metric, **params_fit) **params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 3) self.assertEqual(len(gbm.evals_result_['training']), 3)
self.assertIn('l1', gbm.evals_result_['training']) self.assertIn('l1', gbm.evals_result_['training'])
self.assertIn('gamma', gbm.evals_result_['training']) self.assertIn('gamma', gbm.evals_result_['training'])
...@@ -545,27 +561,27 @@ class TestSklearn(unittest.TestCase): ...@@ -545,27 +561,27 @@ class TestSklearn(unittest.TestCase):
# custom metric (disable default metric for non-default objective) # custom metric (disable default metric for non-default objective)
gbm = lgb.LGBMRegressor(objective='regression_l1', metric='None', gbm = lgb.LGBMRegressor(objective='regression_l1', metric='None',
**params).fit(eval_metric=custom_metric, **params_fit) **params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 1) self.assertEqual(len(gbm.evals_result_['training']), 1)
self.assertIn('error', gbm.evals_result_['training']) self.assertIn('error', gbm.evals_result_['training'])
# custom objective, custom metric # custom objective, custom metric
# custom metric for custom objective # custom metric for custom objective
gbm = lgb.LGBMRegressor(objective=custom_obj, gbm = lgb.LGBMRegressor(objective=custom_dummy_obj,
**params).fit(eval_metric=custom_metric, **params_fit) **params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 1) self.assertEqual(len(gbm.evals_result_['training']), 1)
self.assertIn('error', gbm.evals_result_['training']) self.assertIn('error', gbm.evals_result_['training'])
# non-default regression metric with custom metric for custom objective # non-default regression metric with custom metric for custom objective
gbm = lgb.LGBMRegressor(objective=custom_obj, metric='mape', gbm = lgb.LGBMRegressor(objective=custom_dummy_obj, metric='mape',
**params).fit(eval_metric=custom_metric, **params_fit) **params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 2) self.assertEqual(len(gbm.evals_result_['training']), 2)
self.assertIn('mape', gbm.evals_result_['training']) self.assertIn('mape', gbm.evals_result_['training'])
self.assertIn('error', gbm.evals_result_['training']) self.assertIn('error', gbm.evals_result_['training'])
# multiple regression metrics with custom metric for custom objective # multiple regression metrics with custom metric for custom objective
gbm = lgb.LGBMRegressor(objective=custom_obj, metric=['l2', 'mape'], gbm = lgb.LGBMRegressor(objective=custom_dummy_obj, metric=['l2', 'mape'],
**params).fit(eval_metric=custom_metric, **params_fit) **params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 3) self.assertEqual(len(gbm.evals_result_['training']), 3)
self.assertIn('l2', gbm.evals_result_['training']) self.assertIn('l2', gbm.evals_result_['training'])
self.assertIn('mape', gbm.evals_result_['training']) self.assertIn('mape', gbm.evals_result_['training'])
...@@ -608,13 +624,13 @@ class TestSklearn(unittest.TestCase): ...@@ -608,13 +624,13 @@ class TestSklearn(unittest.TestCase):
self.assertIn('binary_error', gbm.evals_result_['training']) self.assertIn('binary_error', gbm.evals_result_['training'])
# invalid multiclass metric is replaced with binary alternative for custom objective # invalid multiclass metric is replaced with binary alternative for custom objective
gbm = lgb.LGBMClassifier(objective=custom_obj, gbm = lgb.LGBMClassifier(objective=custom_dummy_obj,
**params).fit(eval_metric='multi_logloss', **params_fit) **params).fit(eval_metric='multi_logloss', **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 1) self.assertEqual(len(gbm.evals_result_['training']), 1)
self.assertIn('binary_logloss', gbm.evals_result_['training']) self.assertIn('binary_logloss', gbm.evals_result_['training'])
def test_inf_handle(self): def test_inf_handle(self):
nrows = 1000 nrows = 100
ncols = 10 ncols = 10
X = np.random.randn(nrows, ncols) X = np.random.randn(nrows, ncols)
y = np.random.randn(nrows) + np.full(nrows, 1e30) y = np.random.randn(nrows) + np.full(nrows, 1e30)
...@@ -626,7 +642,7 @@ class TestSklearn(unittest.TestCase): ...@@ -626,7 +642,7 @@ class TestSklearn(unittest.TestCase):
np.testing.assert_allclose(gbm.evals_result_['training']['l2'], np.inf) np.testing.assert_allclose(gbm.evals_result_['training']['l2'], np.inf)
def test_nan_handle(self): def test_nan_handle(self):
nrows = 1000 nrows = 100
ncols = 10 ncols = 10
X = np.random.randn(nrows, ncols) X = np.random.randn(nrows, ncols)
y = np.random.randn(nrows) + np.full(nrows, 1e30) y = np.random.randn(nrows) + np.full(nrows, 1e30)
...@@ -639,12 +655,6 @@ class TestSklearn(unittest.TestCase): ...@@ -639,12 +655,6 @@ class TestSklearn(unittest.TestCase):
def test_first_metric_only(self): def test_first_metric_only(self):
def decreasing_metric(y_true, y_pred):
return ('decreasing_metric', next(decreasing_generator), False)
def constant_metric(y_true, y_pred):
return ('constant_metric', 0.0, False)
def fit_and_check(eval_set_names, metric_names, assumed_iteration, first_metric_only): def fit_and_check(eval_set_names, metric_names, assumed_iteration, first_metric_only):
params['first_metric_only'] = first_metric_only params['first_metric_only'] = first_metric_only
gbm = lgb.LGBMRegressor(**params).fit(**params_fit) gbm = lgb.LGBMRegressor(**params).fit(**params_fit)
...@@ -660,10 +670,9 @@ class TestSklearn(unittest.TestCase): ...@@ -660,10 +670,9 @@ class TestSklearn(unittest.TestCase):
if eval_set_name != 'training' if eval_set_name != 'training'
and assumed_iteration != gbm.n_estimators else 0) and assumed_iteration != gbm.n_estimators else 0)
self.assertEqual(expected, actual) self.assertEqual(expected, actual)
self.assertEqual(assumed_iteration if eval_set_name != 'training' else params['n_estimators'], self.assertEqual(assumed_iteration if eval_set_name != 'training' else gbm.n_estimators,
gbm.best_iteration_) gbm.best_iteration_)
decreasing_generator = itertools.count(0, -1)
X, y = load_boston(True) X, y = load_boston(True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_test1, X_test2, y_test1, y_test2 = train_test_split(X_test, y_test, test_size=0.5, random_state=72) X_test1, X_test2, y_test1, y_test2 = train_test_split(X_test, y_test, test_size=0.5, random_state=72)
...@@ -697,11 +706,11 @@ class TestSklearn(unittest.TestCase): ...@@ -697,11 +706,11 @@ class TestSklearn(unittest.TestCase):
params_fit['eval_metric'] = lambda preds, train_data: [decreasing_metric(preds, train_data), params_fit['eval_metric'] = lambda preds, train_data: [decreasing_metric(preds, train_data),
constant_metric(preds, train_data)] constant_metric(preds, train_data)]
params_fit['eval_set'] = (X_test1, y_test1) params_fit['eval_set'] = (X_test1, y_test1)
fit_and_check(['valid_0'], ['decreasing_metric', 'constant_metric'], 1, False) fit_and_check(['valid_0'], ['decreasing_metric', 'error'], 1, False)
fit_and_check(['valid_0'], ['decreasing_metric', 'constant_metric'], 30, True) fit_and_check(['valid_0'], ['decreasing_metric', 'error'], 30, True)
params_fit['eval_metric'] = lambda preds, train_data: [constant_metric(preds, train_data), params_fit['eval_metric'] = lambda preds, train_data: [constant_metric(preds, train_data),
decreasing_metric(preds, train_data)] decreasing_metric(preds, train_data)]
fit_and_check(['valid_0'], ['decreasing_metric', 'constant_metric'], 1, True) fit_and_check(['valid_0'], ['decreasing_metric', 'error'], 1, True)
# single eval_set # single eval_set
params.pop('metric') params.pop('metric')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment