"src/git@developer.sourcefind.cn:tianlh/lightgbm-dcu.git" did not exist on "39e473239fee5f395caf452543a95961e6722713"
Unverified Commit 82e2ff7a authored by shiyu1994's avatar shiyu1994 Committed by GitHub
Browse files

[Python] / [R] add start_iteration to python predict interface (fix #3058) (#3272)



* [python] add start_iteration to python predict interface (#3058)

* Apply suggestions from code review

* Update lightgbm_R.h

* Apply suggestions from code review

* Apply suggestions from code review

* fix R interface

* update R documentation
Co-authored-by: default avatarGuolin Ke <guolin.ke@outlook.com>
parent 083b02af
...@@ -263,6 +263,7 @@ def test_booster(): ...@@ -263,6 +263,7 @@ def test_booster():
mat.shape[1], mat.shape[1],
1, 1,
1, 1,
0,
25, 25,
c_str(''), c_str(''),
ctypes.byref(num_preb), ctypes.byref(num_preb),
...@@ -273,6 +274,17 @@ def test_booster(): ...@@ -273,6 +274,17 @@ def test_booster():
'../../examples/binary_classification/binary.test')), '../../examples/binary_classification/binary.test')),
0, 0,
0, 0,
0,
25,
c_str(''),
c_str('preb.txt'))
LIB.LGBM_BoosterPredictForFile(
booster2,
c_str(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/binary_classification/binary.test')),
0,
0,
10,
25, 25,
c_str(''), c_str(''),
c_str('preb.txt')) c_str('preb.txt'))
......
...@@ -2315,3 +2315,90 @@ class TestEngine(unittest.TestCase): ...@@ -2315,3 +2315,90 @@ class TestEngine(unittest.TestCase):
est = lgb.train(dict(params, interaction_constraints=[[0] + list(range(2, num_features)), est = lgb.train(dict(params, interaction_constraints=[[0] + list(range(2, num_features)),
[1] + list(range(2, num_features))]), [1] + list(range(2, num_features))]),
train_data, num_boost_round=10) train_data, num_boost_round=10)
def test_predict_with_start_iteration(self):
def inner_test(X, y, params, early_stopping_rounds):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_test, label=y_test)
booster = lgb.train(params, train_data, num_boost_round=100, early_stopping_rounds=early_stopping_rounds, valid_sets=[valid_data])
# test that the predict once with all iterations equals summed results with start_iteration and num_iteration
all_pred = booster.predict(X, raw_score=True)
all_pred_contrib = booster.predict(X, pred_contrib=True)
steps = [10, 12]
for step in steps:
pred = np.zeros_like(all_pred)
pred_contrib = np.zeros_like(all_pred_contrib)
for start_iter in range(0, 100, step):
pred += booster.predict(X, num_iteration=step, start_iteration=start_iter, raw_score=True)
pred_contrib += booster.predict(X, num_iteration=step, start_iteration=start_iter, pred_contrib=True)
np.testing.assert_allclose(all_pred, pred)
np.testing.assert_allclose(all_pred_contrib, pred_contrib)
# test the case where start_iteration <= 0, and num_iteration is None
pred1 = booster.predict(X, start_iteration=-1)
pred2 = booster.predict(X, num_iteration=booster.best_iteration)
pred3 = booster.predict(X, num_iteration=booster.best_iteration, start_iteration=0)
np.testing.assert_allclose(pred1, pred2)
np.testing.assert_allclose(pred1, pred3)
# test the case where start_iteration > 0, and num_iteration <= 0
pred4 = booster.predict(X, start_iteration=10, num_iteration=-1)
pred5 = booster.predict(X, start_iteration=10, num_iteration=90)
pred6 = booster.predict(X, start_iteration=10, num_iteration=0)
np.testing.assert_allclose(pred4, pred5)
np.testing.assert_allclose(pred4, pred6)
# test the case where start_iteration > 0, and num_iteration <= 0, with pred_leaf=True
pred4 = booster.predict(X, start_iteration=10, num_iteration=-1, pred_leaf=True)
pred5 = booster.predict(X, start_iteration=10, num_iteration=90, pred_leaf=True)
pred6 = booster.predict(X, start_iteration=10, num_iteration=0, pred_leaf=True)
np.testing.assert_allclose(pred4, pred5)
np.testing.assert_allclose(pred4, pred6)
# test the case where start_iteration > 0, and num_iteration <= 0, with pred_contrib=True
pred4 = booster.predict(X, start_iteration=10, num_iteration=-1, pred_contrib=True)
pred5 = booster.predict(X, start_iteration=10, num_iteration=90, pred_contrib=True)
pred6 = booster.predict(X, start_iteration=10, num_iteration=0, pred_contrib=True)
np.testing.assert_allclose(pred4, pred5)
np.testing.assert_allclose(pred4, pred6)
# test for regression
X, y = load_boston(True)
params = {
'objective': 'regression',
'verbose': -1,
'metric': 'l2',
'learning_rate': 0.5
}
# test both with and without early stopping
inner_test(X, y, params, early_stopping_rounds=1)
inner_test(X, y, params, early_stopping_rounds=10)
inner_test(X, y, params, early_stopping_rounds=None)
# test for multi-class
X, y = load_iris(True)
params = {
'objective': 'multiclass',
'metric': 'multi_logloss',
'num_class': 3,
'verbose': -1,
'metric': 'multi_error'
}
# test both with and without early stopping
inner_test(X, y, params, early_stopping_rounds=1)
inner_test(X, y, params, early_stopping_rounds=10)
inner_test(X, y, params, early_stopping_rounds=None)
# test for binary
X, y = load_breast_cancer(True)
params = {
'objective': 'binary',
'metric': 'binary_logloss',
'verbose': -1,
'metric': 'auc'
}
# test both with and without early stopping
inner_test(X, y, params, early_stopping_rounds=1)
inner_test(X, y, params, early_stopping_rounds=10)
inner_test(X, y, params, early_stopping_rounds=None)
...@@ -607,6 +607,41 @@ class TestSklearn(unittest.TestCase): ...@@ -607,6 +607,41 @@ class TestSklearn(unittest.TestCase):
np.testing.assert_allclose, np.testing.assert_allclose,
res_engine, res_sklearn_params) res_engine, res_sklearn_params)
# Tests start_iteration
# Tests same probabilities, starting from iteration 10
res_engine = gbm.predict(X_test, start_iteration=10)
res_sklearn = clf.predict_proba(X_test, start_iteration=10)
np.testing.assert_allclose(res_engine, res_sklearn)
# Tests same predictions, starting from iteration 10
res_engine = np.argmax(gbm.predict(X_test, start_iteration=10), axis=1)
res_sklearn = clf.predict(X_test, start_iteration=10)
np.testing.assert_equal(res_engine, res_sklearn)
# Tests same raw scores, starting from iteration 10
res_engine = gbm.predict(X_test, raw_score=True, start_iteration=10)
res_sklearn = clf.predict(X_test, raw_score=True, start_iteration=10)
np.testing.assert_allclose(res_engine, res_sklearn)
# Tests same leaf indices, starting from iteration 10
res_engine = gbm.predict(X_test, pred_leaf=True, start_iteration=10)
res_sklearn = clf.predict(X_test, pred_leaf=True, start_iteration=10)
np.testing.assert_equal(res_engine, res_sklearn)
# Tests same feature contributions, starting from iteration 10
res_engine = gbm.predict(X_test, pred_contrib=True, start_iteration=10)
res_sklearn = clf.predict(X_test, pred_contrib=True, start_iteration=10)
np.testing.assert_allclose(res_engine, res_sklearn)
# Tests other parameters for the prediction works, starting from iteration 10
res_engine = gbm.predict(X_test, start_iteration=10)
res_sklearn_params = clf.predict_proba(X_test,
pred_early_stop=True,
pred_early_stop_margin=1.0, start_iteration=10)
self.assertRaises(AssertionError,
np.testing.assert_allclose,
res_engine, res_sklearn_params)
def test_evaluate_train_set(self): def test_evaluate_train_set(self):
X, y = load_boston(return_X_y=True) X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment