Unverified Commit 9eeac3c7 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[python][tests] minor Python tests cleanup (#3860)

* Update test_engine.py

* Update test_sklearn.py

* Update test_engine.py

* Update test_sklearn.py

* Update test_sklearn.py

* Update test_sklearn.py

* Update test_sklearn.py

* Update test_engine.py

* Update .vsts-ci.yml

* Update .vsts-ci.yml

* Update test_engine.py

* Update test_dual.py

* Update test_engine.py

* Update .vsts-ci.yml

* Update .vsts-ci.yml
parent 5312b955
...@@ -31,5 +31,5 @@ def test_cpu_and_gpu_work(): ...@@ -31,5 +31,5 @@ def test_cpu_and_gpu_work():
gpu_bst = lgb.train(params_gpu, data, num_boost_round=10) gpu_bst = lgb.train(params_gpu, data, num_boost_round=10)
gpu_score = log_loss(y, gpu_bst.predict(X)) gpu_score = log_loss(y, gpu_bst.predict(X))
np.testing.assert_allclose(cpu_score, gpu_score) assert cpu_score == pytest.approx(gpu_score)
assert gpu_score < 0.242 assert gpu_score < 0.242
...@@ -950,7 +950,7 @@ def test_pandas_categorical(): ...@@ -950,7 +950,7 @@ def test_pandas_categorical():
with pytest.raises(AssertionError): with pytest.raises(AssertionError):
np.testing.assert_allclose(pred0, pred7) # ordered cat features aren't treated as cat features by default np.testing.assert_allclose(pred0, pred7) # ordered cat features aren't treated as cat features by default
with pytest.raises(AssertionError): with pytest.raises(AssertionError):
np.testing.assert_allclose(pred0, pred8) # ordered cat features aren't treated as cat features by default np.testing.assert_allclose(pred0, pred8)
assert gbm0.pandas_categorical == cat_values assert gbm0.pandas_categorical == cat_values
assert gbm1.pandas_categorical == cat_values assert gbm1.pandas_categorical == cat_values
assert gbm2.pandas_categorical == cat_values assert gbm2.pandas_categorical == cat_values
...@@ -2500,7 +2500,7 @@ def test_linear_trees(tmp_path): ...@@ -2500,7 +2500,7 @@ def test_linear_trees(tmp_path):
est = lgb.train(dict(params, linear_tree=True), lgb_train, num_boost_round=10, evals_result=res, est = lgb.train(dict(params, linear_tree=True), lgb_train, num_boost_round=10, evals_result=res,
valid_sets=[lgb_train], valid_names=['train']) valid_sets=[lgb_train], valid_names=['train'])
pred2 = est.predict(x) pred2 = est.predict(x)
np.testing.assert_allclose(res['train']['l2'][-1], mean_squared_error(y, pred2), atol=10**(-1)) assert res['train']['l2'][-1] == pytest.approx(mean_squared_error(y, pred2), abs=1e-1)
assert mean_squared_error(y, pred2) < mean_squared_error(y, pred1) assert mean_squared_error(y, pred2) < mean_squared_error(y, pred1)
# test again with nans in data # test again with nans in data
x[:10] = np.nan x[:10] = np.nan
...@@ -2512,14 +2512,14 @@ def test_linear_trees(tmp_path): ...@@ -2512,14 +2512,14 @@ def test_linear_trees(tmp_path):
est = lgb.train(dict(params, linear_tree=True), lgb_train, num_boost_round=10, evals_result=res, est = lgb.train(dict(params, linear_tree=True), lgb_train, num_boost_round=10, evals_result=res,
valid_sets=[lgb_train], valid_names=['train']) valid_sets=[lgb_train], valid_names=['train'])
pred2 = est.predict(x) pred2 = est.predict(x)
np.testing.assert_allclose(res['train']['l2'][-1], mean_squared_error(y, pred2), atol=10**(-1)) assert res['train']['l2'][-1] == pytest.approx(mean_squared_error(y, pred2), abs=1e-1)
assert mean_squared_error(y, pred2) < mean_squared_error(y, pred1) assert mean_squared_error(y, pred2) < mean_squared_error(y, pred1)
# test again with bagging # test again with bagging
res = {} res = {}
est = lgb.train(dict(params, linear_tree=True, subsample=0.8, bagging_freq=1), lgb_train, est = lgb.train(dict(params, linear_tree=True, subsample=0.8, bagging_freq=1), lgb_train,
num_boost_round=10, evals_result=res, valid_sets=[lgb_train], valid_names=['train']) num_boost_round=10, evals_result=res, valid_sets=[lgb_train], valid_names=['train'])
pred = est.predict(x) pred = est.predict(x)
np.testing.assert_allclose(res['train']['l2'][-1], mean_squared_error(y, pred), atol=10**(-1)) assert res['train']['l2'][-1] == pytest.approx(mean_squared_error(y, pred), abs=1e-1)
# test with a feature that has only one non-nan value # test with a feature that has only one non-nan value
x = np.concatenate([np.ones([x.shape[0], 1]), x], 1) x = np.concatenate([np.ones([x.shape[0], 1]), x], 1)
x[500:, 1] = np.nan x[500:, 1] = np.nan
...@@ -2529,7 +2529,7 @@ def test_linear_trees(tmp_path): ...@@ -2529,7 +2529,7 @@ def test_linear_trees(tmp_path):
est = lgb.train(dict(params, linear_tree=True, subsample=0.8, bagging_freq=1), lgb_train, est = lgb.train(dict(params, linear_tree=True, subsample=0.8, bagging_freq=1), lgb_train,
num_boost_round=10, evals_result=res, valid_sets=[lgb_train], valid_names=['train']) num_boost_round=10, evals_result=res, valid_sets=[lgb_train], valid_names=['train'])
pred = est.predict(x) pred = est.predict(x)
np.testing.assert_allclose(res['train']['l2'][-1], mean_squared_error(y, pred), atol=10**(-1)) assert res['train']['l2'][-1] == pytest.approx(mean_squared_error(y, pred), abs=1e-1)
# test with a categorical feature # test with a categorical feature
x[:250, 0] = 0 x[:250, 0] = 0
y[:250] += 10 y[:250] += 10
......
...@@ -86,7 +86,7 @@ def test_binary(): ...@@ -86,7 +86,7 @@ def test_binary():
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
ret = log_loss(y_test, gbm.predict_proba(X_test)) ret = log_loss(y_test, gbm.predict_proba(X_test))
assert ret < 0.12 assert ret < 0.12
assert ret == pytest.approx(gbm.evals_result_['valid_0']['binary_logloss'][gbm.best_iteration_ - 1], abs=1e-5) assert gbm.evals_result_['valid_0']['binary_logloss'][gbm.best_iteration_ - 1] == pytest.approx(ret)
def test_regression(): def test_regression():
...@@ -96,7 +96,7 @@ def test_regression(): ...@@ -96,7 +96,7 @@ def test_regression():
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
ret = mean_squared_error(y_test, gbm.predict(X_test)) ret = mean_squared_error(y_test, gbm.predict(X_test))
assert ret < 7 assert ret < 7
assert ret == pytest.approx(gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1], abs=1e-5) assert gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1] == pytest.approx(ret)
def test_multiclass(): def test_multiclass():
...@@ -108,7 +108,7 @@ def test_multiclass(): ...@@ -108,7 +108,7 @@ def test_multiclass():
assert ret < 0.05 assert ret < 0.05
ret = multi_logloss(y_test, gbm.predict_proba(X_test)) ret = multi_logloss(y_test, gbm.predict_proba(X_test))
assert ret < 0.16 assert ret < 0.16
assert ret == pytest.approx(gbm.evals_result_['valid_0']['multi_logloss'][gbm.best_iteration_ - 1], abs=1e-5) assert gbm.evals_result_['valid_0']['multi_logloss'][gbm.best_iteration_ - 1] == pytest.approx(ret)
def test_lambdarank(): def test_lambdarank():
...@@ -152,7 +152,7 @@ def test_regression_with_custom_objective(): ...@@ -152,7 +152,7 @@ def test_regression_with_custom_objective():
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
ret = mean_squared_error(y_test, gbm.predict(X_test)) ret = mean_squared_error(y_test, gbm.predict(X_test))
assert ret < 7.0 assert ret < 7.0
assert ret == pytest.approx(gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1], abs=1e-5) assert gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1] == pytest.approx(ret)
def test_binary_classification_with_custom_objective(): def test_binary_classification_with_custom_objective():
...@@ -199,10 +199,8 @@ def test_stacking_classifier(): ...@@ -199,10 +199,8 @@ def test_stacking_classifier():
assert clf.named_estimators_['gbm1'].n_features_in_ == clf.named_estimators_['gbm2'].n_features_in_ assert clf.named_estimators_['gbm1'].n_features_in_ == clf.named_estimators_['gbm2'].n_features_in_
assert clf.final_estimator_.n_features_in_ == 10 # number of concatenated features assert clf.final_estimator_.n_features_in_ == 10 # number of concatenated features
assert len(clf.final_estimator_.feature_importances_) == 10 assert len(clf.final_estimator_.feature_importances_) == 10
classes = clf.named_estimators_['gbm1'].classes_ == clf.named_estimators_['gbm2'].classes_ assert all(clf.named_estimators_['gbm1'].classes_ == clf.named_estimators_['gbm2'].classes_)
assert all(classes) assert all(clf.classes_ == clf.named_estimators_['gbm1'].classes_)
classes = clf.classes_ == clf.named_estimators_['gbm1'].classes_
assert all(classes)
# sklearn <0.23 does not have a stacking regressor and n_features_in_ property # sklearn <0.23 does not have a stacking regressor and n_features_in_ property
...@@ -995,6 +993,7 @@ def test_first_metric_only(): ...@@ -995,6 +993,7 @@ def test_first_metric_only():
expected = assumed_iteration + (params_fit['early_stopping_rounds'] expected = assumed_iteration + (params_fit['early_stopping_rounds']
if eval_set_name != 'training' if eval_set_name != 'training'
and assumed_iteration != gbm.n_estimators else 0) and assumed_iteration != gbm.n_estimators else 0)
assert expected == actual
if eval_set_name != 'training': if eval_set_name != 'training':
assert assumed_iteration == gbm.best_iteration_ assert assumed_iteration == gbm.best_iteration_
else: else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment