Commit 8aa08c4a authored by Nikita Titov's avatar Nikita Titov Committed by Guolin Ke
Browse files

[tests] fixed and refactored some tests (#2035)

* fixed number of tests in pytest

* fixed data shape and removed unused code

* refactored tests

* hotfix

* hotfix
parent 219c943d
...@@ -103,9 +103,7 @@ class TestBasic(unittest.TestCase): ...@@ -103,9 +103,7 @@ class TestBasic(unittest.TestCase):
def test_add_features_throws_if_datasets_unconstructed(self): def test_add_features_throws_if_datasets_unconstructed(self):
X1 = np.random.random((1000, 1)) X1 = np.random.random((1000, 1))
X2 = np.random.random((100, 1)) X2 = np.random.random((1000, 1))
d1 = lgb.Dataset(X1)
d2 = lgb.Dataset(X2)
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
d1 = lgb.Dataset(X1) d1 = lgb.Dataset(X1)
d2 = lgb.Dataset(X2) d2 = lgb.Dataset(X2)
...@@ -122,7 +120,7 @@ class TestBasic(unittest.TestCase): ...@@ -122,7 +120,7 @@ class TestBasic(unittest.TestCase):
def test_add_features_equal_data_on_alternating_used_unused(self): def test_add_features_equal_data_on_alternating_used_unused(self):
X = np.random.random((1000, 5)) X = np.random.random((1000, 5))
X[:, [1, 3]] = 0 X[:, [1, 3]] = 0
names = ['col_%d' % (i,) for i in range(5)] names = ['col_%d' % i for i in range(5)]
for j in range(1, 5): for j in range(1, 5):
d1 = lgb.Dataset(X[:, :j], feature_name=names[:j]).construct() d1 = lgb.Dataset(X[:, :j], feature_name=names[:j]).construct()
d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct() d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct()
...@@ -145,7 +143,7 @@ class TestBasic(unittest.TestCase): ...@@ -145,7 +143,7 @@ class TestBasic(unittest.TestCase):
def test_add_features_same_booster_behaviour(self): def test_add_features_same_booster_behaviour(self):
X = np.random.random((1000, 5)) X = np.random.random((1000, 5))
X[:, [1, 3]] = 0 X[:, [1, 3]] = 0
names = ['col_%d' % (i,) for i in range(5)] names = ['col_%d' % i for i in range(5)]
for j in range(1, 5): for j in range(1, 5):
d1 = lgb.Dataset(X[:, :j], feature_name=names[:j]).construct() d1 = lgb.Dataset(X[:, :j], feature_name=names[:j]).construct()
d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct() d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct()
...@@ -171,19 +169,15 @@ class TestBasic(unittest.TestCase): ...@@ -171,19 +169,15 @@ class TestBasic(unittest.TestCase):
d1txt = d1f.read() d1txt = d1f.read()
self.assertEqual(dtxt, d1txt) self.assertEqual(dtxt, d1txt)
def test_get_feature_penalty(self): def test_get_feature_penalty_and_monotone_constraints(self):
X = np.random.random((1000, 1)) X = np.random.random((1000, 1))
d = lgb.Dataset(X, params={'feature_penalty': [0.5]}).construct() d = lgb.Dataset(X, params={'feature_penalty': [0.5],
self.assertEqual(np.asarray([0.5]), d.get_feature_penalty()) 'monotone_constraints': [1]}).construct()
np.testing.assert_almost_equal(d.get_feature_penalty(), [0.5])
np.testing.assert_array_equal(d.get_monotone_constraints(), [1])
d = lgb.Dataset(X).construct() d = lgb.Dataset(X).construct()
self.assertEqual(None, d.get_feature_penalty()) self.assertIsNone(d.get_feature_penalty())
self.assertIsNone(d.get_monotone_constraints())
def test_get_monotone_constraints(self):
X = np.random.random((1000, 1))
d = lgb.Dataset(X, params={'monotone_constraints': [1]}).construct()
self.assertEqual(np.asarray([1]), d.get_monotone_constraints())
d = lgb.Dataset(X).construct()
self.assertEqual(None, d.get_monotone_constraints())
def test_add_features_feature_penalty(self): def test_add_features_feature_penalty(self):
X = np.random.random((1000, 2)) X = np.random.random((1000, 2))
...@@ -193,21 +187,16 @@ class TestBasic(unittest.TestCase): ...@@ -193,21 +187,16 @@ class TestBasic(unittest.TestCase):
(None, [0.5], [1, 0.5]), (None, [0.5], [1, 0.5]),
([0.5], [0.5], [0.5, 0.5])] ([0.5], [0.5], [0.5, 0.5])]
for (p1, p2, expected) in test_cases: for (p1, p2, expected) in test_cases:
if p1 is not None: params1 = {'feature_penalty': p1} if p1 is not None else {}
params1 = {'feature_penalty': p1}
else:
params1 = {}
d1 = lgb.Dataset(X[:, 0].reshape((-1, 1)), params=params1).construct() d1 = lgb.Dataset(X[:, 0].reshape((-1, 1)), params=params1).construct()
if p2 is not None: params2 = {'feature_penalty': p2} if p2 is not None else {}
params2 = {'feature_penalty': p2}
else:
params2 = {}
d2 = lgb.Dataset(X[:, 1].reshape((-1, 1)), params=params2).construct() d2 = lgb.Dataset(X[:, 1].reshape((-1, 1)), params=params2).construct()
d1.add_features_from(d2) d1.add_features_from(d2)
actual = d1.get_feature_penalty() actual = d1.get_feature_penalty()
if isinstance(actual, np.ndarray): if expected is None:
actual = list(actual) self.assertIsNone(actual)
self.assertEqual(expected, actual) else:
np.testing.assert_almost_equal(actual, expected)
def test_add_features_monotone_types(self): def test_add_features_monotone_types(self):
X = np.random.random((1000, 2)) X = np.random.random((1000, 2))
...@@ -217,18 +206,13 @@ class TestBasic(unittest.TestCase): ...@@ -217,18 +206,13 @@ class TestBasic(unittest.TestCase):
(None, [1], [0, 1]), (None, [1], [0, 1]),
([1], [-1], [1, -1])] ([1], [-1], [1, -1])]
for (p1, p2, expected) in test_cases: for (p1, p2, expected) in test_cases:
if p1 is not None: params1 = {'monotone_constraints': p1} if p1 is not None else {}
params1 = {'monotone_constraints': p1}
else:
params1 = {}
d1 = lgb.Dataset(X[:, 0].reshape((-1, 1)), params=params1).construct() d1 = lgb.Dataset(X[:, 0].reshape((-1, 1)), params=params1).construct()
if p2 is not None: params2 = {'monotone_constraints': p2} if p2 is not None else {}
params2 = {'monotone_constraints': p2}
else:
params2 = {}
d2 = lgb.Dataset(X[:, 1].reshape((-1, 1)), params=params2).construct() d2 = lgb.Dataset(X[:, 1].reshape((-1, 1)), params=params2).construct()
d1.add_features_from(d2) d1.add_features_from(d2)
actual = d1.get_monotone_constraints() actual = d1.get_monotone_constraints()
if isinstance(actual, np.ndarray): if actual is None:
actual = list(actual) self.assertIsNone(actual)
self.assertEqual(expected, actual) else:
np.testing.assert_array_equal(actual, expected)
...@@ -766,57 +766,55 @@ class TestEngine(unittest.TestCase): ...@@ -766,57 +766,55 @@ class TestEngine(unittest.TestCase):
pred_mean = pred.mean() pred_mean = pred.mean()
self.assertGreater(pred_mean, 18) self.assertGreater(pred_mean, 18)
def test_constant_features(self, y_true=None, expected_pred=None, more_params=None): def check_constant_features(self, y_true, expected_pred, more_params):
if y_true is not None and expected_pred is not None: X_train = np.ones((len(y_true), 1))
X_train = np.ones((len(y_true), 1)) y_train = np.array(y_true)
y_train = np.array(y_true) params = {
params = { 'objective': 'regression',
'objective': 'regression', 'num_class': 1,
'num_class': 1, 'verbose': -1,
'verbose': -1, 'min_data': 1,
'min_data': 1, 'num_leaves': 2,
'num_leaves': 2, 'learning_rate': 1,
'learning_rate': 1, 'min_data_in_bin': 1,
'min_data_in_bin': 1, 'boost_from_average': True
'boost_from_average': True }
} params.update(more_params)
params.update(more_params) lgb_train = lgb.Dataset(X_train, y_train, params=params)
lgb_train = lgb.Dataset(X_train, y_train, params=params) gbm = lgb.train(params, lgb_train, num_boost_round=2)
gbm = lgb.train(params, lgb_train, pred = gbm.predict(X_train)
num_boost_round=2) self.assertTrue(np.allclose(pred, expected_pred))
pred = gbm.predict(X_train)
self.assertTrue(np.allclose(pred, expected_pred))
def test_constant_features_regression(self): def test_constant_features_regression(self):
params = { params = {
'objective': 'regression' 'objective': 'regression'
} }
self.test_constant_features([0.0, 10.0, 0.0, 10.0], 5.0, params) self.check_constant_features([0.0, 10.0, 0.0, 10.0], 5.0, params)
self.test_constant_features([0.0, 1.0, 2.0, 3.0], 1.5, params) self.check_constant_features([0.0, 1.0, 2.0, 3.0], 1.5, params)
self.test_constant_features([-1.0, 1.0, -2.0, 2.0], 0.0, params) self.check_constant_features([-1.0, 1.0, -2.0, 2.0], 0.0, params)
def test_constant_features_binary(self): def test_constant_features_binary(self):
params = { params = {
'objective': 'binary' 'objective': 'binary'
} }
self.test_constant_features([0.0, 10.0, 0.0, 10.0], 0.5, params) self.check_constant_features([0.0, 10.0, 0.0, 10.0], 0.5, params)
self.test_constant_features([0.0, 1.0, 2.0, 3.0], 0.75, params) self.check_constant_features([0.0, 1.0, 2.0, 3.0], 0.75, params)
def test_constant_features_multiclass(self): def test_constant_features_multiclass(self):
params = { params = {
'objective': 'multiclass', 'objective': 'multiclass',
'num_class': 3 'num_class': 3
} }
self.test_constant_features([0.0, 1.0, 2.0, 0.0], [0.5, 0.25, 0.25], params) self.check_constant_features([0.0, 1.0, 2.0, 0.0], [0.5, 0.25, 0.25], params)
self.test_constant_features([0.0, 1.0, 2.0, 1.0], [0.25, 0.5, 0.25], params) self.check_constant_features([0.0, 1.0, 2.0, 1.0], [0.25, 0.5, 0.25], params)
def test_constant_features_multiclassova(self): def test_constant_features_multiclassova(self):
params = { params = {
'objective': 'multiclassova', 'objective': 'multiclassova',
'num_class': 3 'num_class': 3
} }
self.test_constant_features([0.0, 1.0, 2.0, 0.0], [0.5, 0.25, 0.25], params) self.check_constant_features([0.0, 1.0, 2.0, 0.0], [0.5, 0.25, 0.25], params)
self.test_constant_features([0.0, 1.0, 2.0, 1.0], [0.25, 0.5, 0.25], params) self.check_constant_features([0.0, 1.0, 2.0, 1.0], [0.25, 0.5, 0.25], params)
def test_fpreproc(self): def test_fpreproc(self):
def preprocess_data(dtrain, dtest, params): def preprocess_data(dtrain, dtest, params):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment