Unverified Commit 1f1dc452 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[tests][python] refined python tests (#2483)

* speed up tests

* more updates

* fixed pylint

* updated tests

* Update test_sklearn.py

* test that indices are sorted internally
parent 00d1e693
...@@ -69,7 +69,8 @@ def load_from_file(filename, reference): ...@@ -69,7 +69,8 @@ def load_from_file(filename, reference):
LIB.LGBM_DatasetCreateFromFile( LIB.LGBM_DatasetCreateFromFile(
c_str(filename), c_str(filename),
c_str('max_bin=15'), c_str('max_bin=15'),
ref, ctypes.byref(handle)) ref,
ctypes.byref(handle))
print(LIB.LGBM_GetLastError()) print(LIB.LGBM_GetLastError())
num_data = ctypes.c_long() num_data = ctypes.c_long()
LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data)) LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
...@@ -88,8 +89,9 @@ def load_from_csr(filename, reference): ...@@ -88,8 +89,9 @@ def load_from_csr(filename, reference):
label = [] label = []
with open(filename, 'r') as inp: with open(filename, 'r') as inp:
for line in inp.readlines(): for line in inp.readlines():
data.append([float(x) for x in line.split('\t')[1:]]) values = line.split('\t')
label.append(float(line.split('\t')[0])) data.append([float(x) for x in values[1:]])
label.append(float(values[0]))
mat = np.array(data) mat = np.array(data)
label = np.array(label, dtype=np.float32) label = np.array(label, dtype=np.float32)
csr = sparse.csr_matrix(mat) csr = sparse.csr_matrix(mat)
...@@ -124,8 +126,9 @@ def load_from_csc(filename, reference): ...@@ -124,8 +126,9 @@ def load_from_csc(filename, reference):
label = [] label = []
with open(filename, 'r') as inp: with open(filename, 'r') as inp:
for line in inp.readlines(): for line in inp.readlines():
data.append([float(x) for x in line.split('\t')[1:]]) values = line.split('\t')
label.append(float(line.split('\t')[0])) data.append([float(x) for x in values[1:]])
label.append(float(values[0]))
mat = np.array(data) mat = np.array(data)
label = np.array(label, dtype=np.float32) label = np.array(label, dtype=np.float32)
csr = sparse.csc_matrix(mat) csr = sparse.csc_matrix(mat)
...@@ -160,8 +163,9 @@ def load_from_mat(filename, reference): ...@@ -160,8 +163,9 @@ def load_from_mat(filename, reference):
label = [] label = []
with open(filename, 'r') as inp: with open(filename, 'r') as inp:
for line in inp.readlines(): for line in inp.readlines():
data.append([float(x) for x in line.split('\t')[1:]]) values = line.split('\t')
label.append(float(line.split('\t')[0])) data.append([float(x) for x in values[1:]])
label.append(float(values[0]))
mat = np.array(data) mat = np.array(data)
data = np.array(mat.reshape(mat.size), copy=False) data = np.array(mat.reshape(mat.size), copy=False)
label = np.array(label, dtype=np.float32) label = np.array(label, dtype=np.float32)
...@@ -222,7 +226,7 @@ def test_booster(): ...@@ -222,7 +226,7 @@ def test_booster():
ctypes.byref(booster)) ctypes.byref(booster))
LIB.LGBM_BoosterAddValidData(booster, test) LIB.LGBM_BoosterAddValidData(booster, test)
is_finished = ctypes.c_int(0) is_finished = ctypes.c_int(0)
for i in range(1, 101): for i in range(1, 51):
LIB.LGBM_BoosterUpdateOneIter(booster, ctypes.byref(is_finished)) LIB.LGBM_BoosterUpdateOneIter(booster, ctypes.byref(is_finished))
result = np.array([0.0], dtype=np.float64) result = np.array([0.0], dtype=np.float64)
out_len = ctypes.c_ulong(0) out_len = ctypes.c_ulong(0)
...@@ -260,7 +264,7 @@ def test_booster(): ...@@ -260,7 +264,7 @@ def test_booster():
mat.shape[1], mat.shape[1],
1, 1,
1, 1,
50, 25,
c_str(''), c_str(''),
ctypes.byref(num_preb), ctypes.byref(num_preb),
preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double))) preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
...@@ -270,7 +274,7 @@ def test_booster(): ...@@ -270,7 +274,7 @@ def test_booster():
'../../examples/binary_classification/binary.test')), '../../examples/binary_classification/binary.test')),
0, 0,
0, 0,
50, 25,
c_str(''), c_str(''),
c_str('preb.txt')) c_str('preb.txt'))
LIB.LGBM_BoosterFree(booster2) LIB.LGBM_BoosterFree(booster2)
...@@ -31,13 +31,13 @@ class TestBasic(unittest.TestCase): ...@@ -31,13 +31,13 @@ class TestBasic(unittest.TestCase):
bst = lgb.Booster(params, train_data) bst = lgb.Booster(params, train_data)
bst.add_valid(valid_data, "valid_1") bst.add_valid(valid_data, "valid_1")
for i in range(30): for i in range(20):
bst.update() bst.update()
if i % 10 == 0: if i % 10 == 0:
print(bst.eval_train(), bst.eval_valid()) print(bst.eval_train(), bst.eval_valid())
self.assertEqual(bst.current_iteration(), 30) self.assertEqual(bst.current_iteration(), 20)
self.assertEqual(bst.num_trees(), 30) self.assertEqual(bst.num_trees(), 20)
self.assertEqual(bst.num_model_per_iteration(), 1) self.assertEqual(bst.num_model_per_iteration(), 1)
bst.save_model("model.txt") bst.save_model("model.txt")
...@@ -48,26 +48,20 @@ class TestBasic(unittest.TestCase): ...@@ -48,26 +48,20 @@ class TestBasic(unittest.TestCase):
dump_svmlight_file(X_test, y_test, f) dump_svmlight_file(X_test, y_test, f)
pred_from_file = bst.predict(tname) pred_from_file = bst.predict(tname)
os.remove(tname) os.remove(tname)
self.assertEqual(len(pred_from_matr), len(pred_from_file)) np.testing.assert_allclose(pred_from_matr, pred_from_file)
for preds in zip(pred_from_matr, pred_from_file):
self.assertAlmostEqual(*preds, places=15)
# check saved model persistence # check saved model persistence
bst = lgb.Booster(params, model_file="model.txt") bst = lgb.Booster(params, model_file="model.txt")
os.remove("model.txt") os.remove("model.txt")
pred_from_model_file = bst.predict(X_test) pred_from_model_file = bst.predict(X_test)
self.assertEqual(len(pred_from_matr), len(pred_from_model_file))
for preds in zip(pred_from_matr, pred_from_model_file):
# we need to check the consistency of model file here, so test for exact equal # we need to check the consistency of model file here, so test for exact equal
self.assertEqual(*preds) np.testing.assert_array_equal(pred_from_matr, pred_from_model_file)
# check early stopping is working. Make it stop very early, so the scores should be very close to zero # check early stopping is working. Make it stop very early, so the scores should be very close to zero
pred_parameter = {"pred_early_stop": True, "pred_early_stop_freq": 5, "pred_early_stop_margin": 1.5} pred_parameter = {"pred_early_stop": True, "pred_early_stop_freq": 5, "pred_early_stop_margin": 1.5}
pred_early_stopping = bst.predict(X_test, **pred_parameter) pred_early_stopping = bst.predict(X_test, **pred_parameter)
self.assertEqual(len(pred_from_matr), len(pred_early_stopping))
for preds in zip(pred_early_stopping, pred_from_matr):
# scores likely to be different, but prediction should still be the same # scores likely to be different, but prediction should still be the same
self.assertEqual(preds[0] > 0, preds[1] > 0) np.testing.assert_array_equal(np.sign(pred_from_matr), np.sign(pred_early_stopping))
# test that shape is checked during prediction # test that shape is checked during prediction
bad_X_test = X_test[:, 1:] bad_X_test = X_test[:, 1:]
...@@ -97,7 +91,6 @@ class TestBasic(unittest.TestCase): ...@@ -97,7 +91,6 @@ class TestBasic(unittest.TestCase):
train_data = lgb.Dataset(X_train, label=y_train, params={"bin_construct_sample_cnt": 100}) train_data = lgb.Dataset(X_train, label=y_train, params={"bin_construct_sample_cnt": 100})
valid_data = train_data.create_valid(X_test, label=y_test, params={"bin_construct_sample_cnt": 100}) valid_data = train_data.create_valid(X_test, label=y_test, params={"bin_construct_sample_cnt": 100})
train_data.construct() train_data.construct()
valid_data.construct() valid_data.construct()
...@@ -108,23 +101,23 @@ class TestBasic(unittest.TestCase): ...@@ -108,23 +101,23 @@ class TestBasic(unittest.TestCase):
'../../examples/lambdarank/rank.train.query')) '../../examples/lambdarank/rank.train.query'))
lgb_train = lgb.Dataset(X_train, y_train, group=q_train) lgb_train = lgb.Dataset(X_train, y_train, group=q_train)
self.assertEqual(len(lgb_train.get_group()), 201) self.assertEqual(len(lgb_train.get_group()), 201)
subset = lgb_train.subset(list(lgb.compat.range_(10))).construct() subset = lgb_train.subset(list(range(10))).construct()
subset_group = subset.get_group() subset_group = subset.get_group()
self.assertEqual(len(subset_group), 2) self.assertEqual(len(subset_group), 2)
self.assertEqual(subset_group[0], 1) self.assertEqual(subset_group[0], 1)
self.assertEqual(subset_group[1], 9) self.assertEqual(subset_group[1], 9)
def test_add_features_throws_if_num_data_unequal(self): def test_add_features_throws_if_num_data_unequal(self):
X1 = np.random.random((1000, 1)) X1 = np.random.random((100, 1))
X2 = np.random.random((100, 1)) X2 = np.random.random((10, 1))
d1 = lgb.Dataset(X1).construct() d1 = lgb.Dataset(X1).construct()
d2 = lgb.Dataset(X2).construct() d2 = lgb.Dataset(X2).construct()
with self.assertRaises(lgb.basic.LightGBMError): with self.assertRaises(lgb.basic.LightGBMError):
d1.add_features_from(d2) d1.add_features_from(d2)
def test_add_features_throws_if_datasets_unconstructed(self): def test_add_features_throws_if_datasets_unconstructed(self):
X1 = np.random.random((1000, 1)) X1 = np.random.random((100, 1))
X2 = np.random.random((1000, 1)) X2 = np.random.random((100, 1))
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
d1 = lgb.Dataset(X1) d1 = lgb.Dataset(X1)
d2 = lgb.Dataset(X2) d2 = lgb.Dataset(X2)
...@@ -139,7 +132,8 @@ class TestBasic(unittest.TestCase): ...@@ -139,7 +132,8 @@ class TestBasic(unittest.TestCase):
d1.add_features_from(d2) d1.add_features_from(d2)
def test_add_features_equal_data_on_alternating_used_unused(self): def test_add_features_equal_data_on_alternating_used_unused(self):
X = np.random.random((1000, 5)) self.maxDiff = None
X = np.random.random((100, 5))
X[:, [1, 3]] = 0 X[:, [1, 3]] = 0
names = ['col_%d' % i for i in range(5)] names = ['col_%d' % i for i in range(5)]
for j in range(1, 5): for j in range(1, 5):
...@@ -162,7 +156,8 @@ class TestBasic(unittest.TestCase): ...@@ -162,7 +156,8 @@ class TestBasic(unittest.TestCase):
self.assertEqual(dtxt, d1txt) self.assertEqual(dtxt, d1txt)
def test_add_features_same_booster_behaviour(self): def test_add_features_same_booster_behaviour(self):
X = np.random.random((1000, 5)) self.maxDiff = None
X = np.random.random((100, 5))
X[:, [1, 3]] = 0 X[:, [1, 3]] = 0
names = ['col_%d' % i for i in range(5)] names = ['col_%d' % i for i in range(5)]
for j in range(1, 5): for j in range(1, 5):
...@@ -170,7 +165,7 @@ class TestBasic(unittest.TestCase): ...@@ -170,7 +165,7 @@ class TestBasic(unittest.TestCase):
d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct() d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct()
d1.add_features_from(d2) d1.add_features_from(d2)
d = lgb.Dataset(X, feature_name=names).construct() d = lgb.Dataset(X, feature_name=names).construct()
y = np.random.random(1000) y = np.random.random(100)
d1.set_label(y) d1.set_label(y)
d.set_label(y) d.set_label(y)
b1 = lgb.Booster(train_set=d1) b1 = lgb.Booster(train_set=d1)
...@@ -191,7 +186,7 @@ class TestBasic(unittest.TestCase): ...@@ -191,7 +186,7 @@ class TestBasic(unittest.TestCase):
self.assertEqual(dtxt, d1txt) self.assertEqual(dtxt, d1txt)
def test_get_feature_penalty_and_monotone_constraints(self): def test_get_feature_penalty_and_monotone_constraints(self):
X = np.random.random((1000, 1)) X = np.random.random((100, 1))
d = lgb.Dataset(X, params={'feature_penalty': [0.5], d = lgb.Dataset(X, params={'feature_penalty': [0.5],
'monotone_constraints': [1]}).construct() 'monotone_constraints': [1]}).construct()
np.testing.assert_allclose(d.get_feature_penalty(), [0.5]) np.testing.assert_allclose(d.get_feature_penalty(), [0.5])
...@@ -201,7 +196,7 @@ class TestBasic(unittest.TestCase): ...@@ -201,7 +196,7 @@ class TestBasic(unittest.TestCase):
self.assertIsNone(d.get_monotone_constraints()) self.assertIsNone(d.get_monotone_constraints())
def test_add_features_feature_penalty(self): def test_add_features_feature_penalty(self):
X = np.random.random((1000, 2)) X = np.random.random((100, 2))
test_cases = [ test_cases = [
(None, None, None), (None, None, None),
([0.5], None, [0.5, 1]), ([0.5], None, [0.5, 1]),
...@@ -220,7 +215,7 @@ class TestBasic(unittest.TestCase): ...@@ -220,7 +215,7 @@ class TestBasic(unittest.TestCase):
np.testing.assert_allclose(actual, expected) np.testing.assert_allclose(actual, expected)
def test_add_features_monotone_types(self): def test_add_features_monotone_types(self):
X = np.random.random((1000, 2)) X = np.random.random((100, 2))
test_cases = [ test_cases = [
(None, None, None), (None, None, None),
([1], None, [1, 0]), ([1], None, [1, 0]),
...@@ -239,9 +234,9 @@ class TestBasic(unittest.TestCase): ...@@ -239,9 +234,9 @@ class TestBasic(unittest.TestCase):
np.testing.assert_array_equal(actual, expected) np.testing.assert_array_equal(actual, expected)
def test_cegb_affects_behavior(self): def test_cegb_affects_behavior(self):
X = np.random.random((1000, 5)) X = np.random.random((100, 5))
X[:, [1, 3]] = 0 X[:, [1, 3]] = 0
y = np.random.random(1000) y = np.random.random(100)
names = ['col_%d' % i for i in range(5)] names = ['col_%d' % i for i in range(5)]
ds = lgb.Dataset(X, feature_name=names).construct() ds = lgb.Dataset(X, feature_name=names).construct()
ds.set_label(y) ds.set_label(y)
...@@ -269,9 +264,9 @@ class TestBasic(unittest.TestCase): ...@@ -269,9 +264,9 @@ class TestBasic(unittest.TestCase):
self.assertNotEqual(basetxt, casetxt) self.assertNotEqual(basetxt, casetxt)
def test_cegb_scaling_equalities(self): def test_cegb_scaling_equalities(self):
X = np.random.random((1000, 5)) X = np.random.random((100, 5))
X[:, [1, 3]] = 0 X[:, [1, 3]] = 0
y = np.random.random(1000) y = np.random.random(100)
names = ['col_%d' % i for i in range(5)] names = ['col_%d' % i for i in range(5)]
ds = lgb.Dataset(X, feature_name=names).construct() ds = lgb.Dataset(X, feature_name=names).construct()
ds.set_label(y) ds.set_label(y)
...@@ -298,9 +293,9 @@ class TestBasic(unittest.TestCase): ...@@ -298,9 +293,9 @@ class TestBasic(unittest.TestCase):
with tempfile.NamedTemporaryFile() as f: with tempfile.NamedTemporaryFile() as f:
p2name = f.name p2name = f.name
booster2.save_model(p2name) booster2.save_model(p2name)
self.maxDiff = None
with open(p2name, 'rt') as f: with open(p2name, 'rt') as f:
p2txt = f.read() p2txt = f.read()
self.maxDiff = None
self.assertEqual(p1txt, p2txt) self.assertEqual(p1txt, p2txt)
def test_consistent_state_for_dataset_fields(self): def test_consistent_state_for_dataset_fields(self):
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment