"vscode:/vscode.git/clone" did not exist on "9503d3f9da3a55cc52ef207e15fda7c9e1330c4d"
Unverified Commit 1f1dc452 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[tests][python] refined python tests (#2483)

* speed up tests

* more updates

* fixed pylint

* updated tests

* Update test_sklearn.py

* test that indices are sorted internally
parent 00d1e693
......@@ -69,7 +69,8 @@ def load_from_file(filename, reference):
LIB.LGBM_DatasetCreateFromFile(
c_str(filename),
c_str('max_bin=15'),
ref, ctypes.byref(handle))
ref,
ctypes.byref(handle))
print(LIB.LGBM_GetLastError())
num_data = ctypes.c_long()
LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
......@@ -88,8 +89,9 @@ def load_from_csr(filename, reference):
label = []
with open(filename, 'r') as inp:
for line in inp.readlines():
data.append([float(x) for x in line.split('\t')[1:]])
label.append(float(line.split('\t')[0]))
values = line.split('\t')
data.append([float(x) for x in values[1:]])
label.append(float(values[0]))
mat = np.array(data)
label = np.array(label, dtype=np.float32)
csr = sparse.csr_matrix(mat)
......@@ -124,8 +126,9 @@ def load_from_csc(filename, reference):
label = []
with open(filename, 'r') as inp:
for line in inp.readlines():
data.append([float(x) for x in line.split('\t')[1:]])
label.append(float(line.split('\t')[0]))
values = line.split('\t')
data.append([float(x) for x in values[1:]])
label.append(float(values[0]))
mat = np.array(data)
label = np.array(label, dtype=np.float32)
csr = sparse.csc_matrix(mat)
......@@ -160,8 +163,9 @@ def load_from_mat(filename, reference):
label = []
with open(filename, 'r') as inp:
for line in inp.readlines():
data.append([float(x) for x in line.split('\t')[1:]])
label.append(float(line.split('\t')[0]))
values = line.split('\t')
data.append([float(x) for x in values[1:]])
label.append(float(values[0]))
mat = np.array(data)
data = np.array(mat.reshape(mat.size), copy=False)
label = np.array(label, dtype=np.float32)
......@@ -222,7 +226,7 @@ def test_booster():
ctypes.byref(booster))
LIB.LGBM_BoosterAddValidData(booster, test)
is_finished = ctypes.c_int(0)
for i in range(1, 101):
for i in range(1, 51):
LIB.LGBM_BoosterUpdateOneIter(booster, ctypes.byref(is_finished))
result = np.array([0.0], dtype=np.float64)
out_len = ctypes.c_ulong(0)
......@@ -260,7 +264,7 @@ def test_booster():
mat.shape[1],
1,
1,
50,
25,
c_str(''),
ctypes.byref(num_preb),
preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
......@@ -270,7 +274,7 @@ def test_booster():
'../../examples/binary_classification/binary.test')),
0,
0,
50,
25,
c_str(''),
c_str('preb.txt'))
LIB.LGBM_BoosterFree(booster2)
......@@ -31,13 +31,13 @@ class TestBasic(unittest.TestCase):
bst = lgb.Booster(params, train_data)
bst.add_valid(valid_data, "valid_1")
for i in range(30):
for i in range(20):
bst.update()
if i % 10 == 0:
print(bst.eval_train(), bst.eval_valid())
self.assertEqual(bst.current_iteration(), 30)
self.assertEqual(bst.num_trees(), 30)
self.assertEqual(bst.current_iteration(), 20)
self.assertEqual(bst.num_trees(), 20)
self.assertEqual(bst.num_model_per_iteration(), 1)
bst.save_model("model.txt")
......@@ -48,26 +48,20 @@ class TestBasic(unittest.TestCase):
dump_svmlight_file(X_test, y_test, f)
pred_from_file = bst.predict(tname)
os.remove(tname)
self.assertEqual(len(pred_from_matr), len(pred_from_file))
for preds in zip(pred_from_matr, pred_from_file):
self.assertAlmostEqual(*preds, places=15)
np.testing.assert_allclose(pred_from_matr, pred_from_file)
# check saved model persistence
bst = lgb.Booster(params, model_file="model.txt")
os.remove("model.txt")
pred_from_model_file = bst.predict(X_test)
self.assertEqual(len(pred_from_matr), len(pred_from_model_file))
for preds in zip(pred_from_matr, pred_from_model_file):
# we need to check the consistency of model file here, so test for exact equal
self.assertEqual(*preds)
# we need to check the consistency of model file here, so test for exact equal
np.testing.assert_array_equal(pred_from_matr, pred_from_model_file)
# check early stopping is working. Make it stop very early, so the scores should be very close to zero
pred_parameter = {"pred_early_stop": True, "pred_early_stop_freq": 5, "pred_early_stop_margin": 1.5}
pred_early_stopping = bst.predict(X_test, **pred_parameter)
self.assertEqual(len(pred_from_matr), len(pred_early_stopping))
for preds in zip(pred_early_stopping, pred_from_matr):
# scores likely to be different, but prediction should still be the same
self.assertEqual(preds[0] > 0, preds[1] > 0)
# scores likely to be different, but prediction should still be the same
np.testing.assert_array_equal(np.sign(pred_from_matr), np.sign(pred_early_stopping))
# test that shape is checked during prediction
bad_X_test = X_test[:, 1:]
......@@ -97,7 +91,6 @@ class TestBasic(unittest.TestCase):
train_data = lgb.Dataset(X_train, label=y_train, params={"bin_construct_sample_cnt": 100})
valid_data = train_data.create_valid(X_test, label=y_test, params={"bin_construct_sample_cnt": 100})
train_data.construct()
valid_data.construct()
......@@ -108,23 +101,23 @@ class TestBasic(unittest.TestCase):
'../../examples/lambdarank/rank.train.query'))
lgb_train = lgb.Dataset(X_train, y_train, group=q_train)
self.assertEqual(len(lgb_train.get_group()), 201)
subset = lgb_train.subset(list(lgb.compat.range_(10))).construct()
subset = lgb_train.subset(list(range(10))).construct()
subset_group = subset.get_group()
self.assertEqual(len(subset_group), 2)
self.assertEqual(subset_group[0], 1)
self.assertEqual(subset_group[1], 9)
def test_add_features_throws_if_num_data_unequal(self):
X1 = np.random.random((1000, 1))
X2 = np.random.random((100, 1))
X1 = np.random.random((100, 1))
X2 = np.random.random((10, 1))
d1 = lgb.Dataset(X1).construct()
d2 = lgb.Dataset(X2).construct()
with self.assertRaises(lgb.basic.LightGBMError):
d1.add_features_from(d2)
def test_add_features_throws_if_datasets_unconstructed(self):
X1 = np.random.random((1000, 1))
X2 = np.random.random((1000, 1))
X1 = np.random.random((100, 1))
X2 = np.random.random((100, 1))
with self.assertRaises(ValueError):
d1 = lgb.Dataset(X1)
d2 = lgb.Dataset(X2)
......@@ -139,7 +132,8 @@ class TestBasic(unittest.TestCase):
d1.add_features_from(d2)
def test_add_features_equal_data_on_alternating_used_unused(self):
X = np.random.random((1000, 5))
self.maxDiff = None
X = np.random.random((100, 5))
X[:, [1, 3]] = 0
names = ['col_%d' % i for i in range(5)]
for j in range(1, 5):
......@@ -162,7 +156,8 @@ class TestBasic(unittest.TestCase):
self.assertEqual(dtxt, d1txt)
def test_add_features_same_booster_behaviour(self):
X = np.random.random((1000, 5))
self.maxDiff = None
X = np.random.random((100, 5))
X[:, [1, 3]] = 0
names = ['col_%d' % i for i in range(5)]
for j in range(1, 5):
......@@ -170,7 +165,7 @@ class TestBasic(unittest.TestCase):
d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct()
d1.add_features_from(d2)
d = lgb.Dataset(X, feature_name=names).construct()
y = np.random.random(1000)
y = np.random.random(100)
d1.set_label(y)
d.set_label(y)
b1 = lgb.Booster(train_set=d1)
......@@ -191,7 +186,7 @@ class TestBasic(unittest.TestCase):
self.assertEqual(dtxt, d1txt)
def test_get_feature_penalty_and_monotone_constraints(self):
X = np.random.random((1000, 1))
X = np.random.random((100, 1))
d = lgb.Dataset(X, params={'feature_penalty': [0.5],
'monotone_constraints': [1]}).construct()
np.testing.assert_allclose(d.get_feature_penalty(), [0.5])
......@@ -201,7 +196,7 @@ class TestBasic(unittest.TestCase):
self.assertIsNone(d.get_monotone_constraints())
def test_add_features_feature_penalty(self):
X = np.random.random((1000, 2))
X = np.random.random((100, 2))
test_cases = [
(None, None, None),
([0.5], None, [0.5, 1]),
......@@ -220,7 +215,7 @@ class TestBasic(unittest.TestCase):
np.testing.assert_allclose(actual, expected)
def test_add_features_monotone_types(self):
X = np.random.random((1000, 2))
X = np.random.random((100, 2))
test_cases = [
(None, None, None),
([1], None, [1, 0]),
......@@ -239,9 +234,9 @@ class TestBasic(unittest.TestCase):
np.testing.assert_array_equal(actual, expected)
def test_cegb_affects_behavior(self):
X = np.random.random((1000, 5))
X = np.random.random((100, 5))
X[:, [1, 3]] = 0
y = np.random.random(1000)
y = np.random.random(100)
names = ['col_%d' % i for i in range(5)]
ds = lgb.Dataset(X, feature_name=names).construct()
ds.set_label(y)
......@@ -269,9 +264,9 @@ class TestBasic(unittest.TestCase):
self.assertNotEqual(basetxt, casetxt)
def test_cegb_scaling_equalities(self):
X = np.random.random((1000, 5))
X = np.random.random((100, 5))
X[:, [1, 3]] = 0
y = np.random.random(1000)
y = np.random.random(100)
names = ['col_%d' % i for i in range(5)]
ds = lgb.Dataset(X, feature_name=names).construct()
ds.set_label(y)
......@@ -298,9 +293,9 @@ class TestBasic(unittest.TestCase):
with tempfile.NamedTemporaryFile() as f:
p2name = f.name
booster2.save_model(p2name)
self.maxDiff = None
with open(p2name, 'rt') as f:
p2txt = f.read()
self.maxDiff = None
self.assertEqual(p1txt, p2txt)
def test_consistent_state_for_dataset_fields(self):
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment