Unverified Commit 02e4b791 authored by Shrill Shrestha's avatar Shrill Shrestha Committed by GitHub
Browse files

[dask] merge local_predict tests into other tests (fixes #3833) (#3842)



* Merge test_<est>_local_predict and test_<est> tests for Dask module ##3833

* Merge test_<est>_local_predict to test_<est> tests in dask module - refactor #3833

* Update test_classifier and rename variables resolves #3833

* rename variables resolves #3833

* manage precision error #3833
Co-authored-by: default avatarJames Lamb <jaylamb20@gmail.com>
parent 340229a5
...@@ -154,6 +154,7 @@ def test_classifier(output, centers, client, listen_port): ...@@ -154,6 +154,7 @@ def test_classifier(output, centers, client, listen_port):
dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client) dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client)
p1 = dask_classifier.predict(dX) p1 = dask_classifier.predict(dX)
p1_proba = dask_classifier.predict_proba(dX).compute() p1_proba = dask_classifier.predict_proba(dX).compute()
p1_local = dask_classifier.to_local().predict(X)
s1 = _accuracy_score(dy, p1) s1 = _accuracy_score(dy, p1)
p1 = p1.compute() p1 = p1.compute()
...@@ -168,6 +169,8 @@ def test_classifier(output, centers, client, listen_port): ...@@ -168,6 +169,8 @@ def test_classifier(output, centers, client, listen_port):
assert_eq(y, p1) assert_eq(y, p1)
assert_eq(y, p2) assert_eq(y, p2)
assert_eq(p1_proba, p2_proba, atol=0.3) assert_eq(p1_proba, p2_proba, atol=0.3)
assert_eq(p1_local, p2)
assert_eq(y, p1_local)
client.close() client.close()
...@@ -249,35 +252,6 @@ def test_training_does_not_fail_on_port_conflicts(client): ...@@ -249,35 +252,6 @@ def test_training_does_not_fail_on_port_conflicts(client):
client.close() client.close()
def test_classifier_local_predict(client, listen_port):
X, y, w, dX, dy, dw = _create_data(
objective='classification',
output='array'
)
params = {
"n_estimators": 10,
"num_leaves": 10
}
dask_classifier = lgb.DaskLGBMClassifier(
time_out=5,
local_port=listen_port,
**params
)
dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client)
p1 = dask_classifier.to_local().predict(dX)
local_classifier = lgb.LGBMClassifier(**params)
local_classifier.fit(X, y, sample_weight=w)
p2 = local_classifier.predict(X)
assert_eq(p1, p2)
assert_eq(y, p1)
assert_eq(y, p2)
client.close()
@pytest.mark.parametrize('output', data_output) @pytest.mark.parametrize('output', data_output)
def test_regressor(output, client, listen_port): def test_regressor(output, client, listen_port):
X, y, w, dX, dy, dw = _create_data( X, y, w, dX, dy, dw = _create_data(
...@@ -300,6 +274,8 @@ def test_regressor(output, client, listen_port): ...@@ -300,6 +274,8 @@ def test_regressor(output, client, listen_port):
if output != 'dataframe': if output != 'dataframe':
s1 = _r2_score(dy, p1) s1 = _r2_score(dy, p1)
p1 = p1.compute() p1 = p1.compute()
p1_local = dask_regressor.to_local().predict(X)
s1_local = dask_regressor.to_local().score(X, y)
local_regressor = lgb.LGBMRegressor(**params) local_regressor = lgb.LGBMRegressor(**params)
local_regressor.fit(X, y, sample_weight=w) local_regressor.fit(X, y, sample_weight=w)
...@@ -309,10 +285,12 @@ def test_regressor(output, client, listen_port): ...@@ -309,10 +285,12 @@ def test_regressor(output, client, listen_port):
# Scores should be the same # Scores should be the same
if output != 'dataframe': if output != 'dataframe':
assert_eq(s1, s2, atol=.01) assert_eq(s1, s2, atol=.01)
assert_eq(s1, s1_local, atol=.003)
# Predictions should be roughly the same # Predictions should be roughly the same
assert_eq(y, p1, rtol=1., atol=100.) assert_eq(y, p1, rtol=1., atol=100.)
assert_eq(y, p2, rtol=1., atol=50.) assert_eq(y, p2, rtol=1., atol=50.)
assert_eq(p1, p1_local)
client.close() client.close()
...@@ -387,30 +365,6 @@ def test_regressor_quantile(output, client, listen_port, alpha): ...@@ -387,30 +365,6 @@ def test_regressor_quantile(output, client, listen_port, alpha):
client.close() client.close()
def test_regressor_local_predict(client, listen_port):
X, y, _, dX, dy, dw = _create_data('regression', output='array')
dask_regressor = lgb.DaskLGBMRegressor(
local_listen_port=listen_port,
random_state=42,
n_estimators=10,
num_leaves=10,
tree_type='data'
)
dask_regressor = dask_regressor.fit(dX, dy, sample_weight=dw, client=client)
p1 = dask_regressor.predict(dX)
p2 = dask_regressor.to_local().predict(X)
s1 = _r2_score(dy, p1)
p1 = p1.compute()
s2 = dask_regressor.to_local().score(X, y)
# Predictions and scores should be the same
assert_eq(p1, p2)
assert_eq(s1, s2)
client.close()
@pytest.mark.parametrize('output', ['array', 'dataframe']) @pytest.mark.parametrize('output', ['array', 'dataframe'])
@pytest.mark.parametrize('group', [None, group_sizes]) @pytest.mark.parametrize('group', [None, group_sizes])
def test_ranker(output, client, listen_port, group): def test_ranker(output, client, listen_port, group):
...@@ -437,6 +391,7 @@ def test_ranker(output, client, listen_port, group): ...@@ -437,6 +391,7 @@ def test_ranker(output, client, listen_port, group):
dask_ranker = dask_ranker.fit(dX, dy, sample_weight=dw, group=dg, client=client) dask_ranker = dask_ranker.fit(dX, dy, sample_weight=dw, group=dg, client=client)
rnkvec_dask = dask_ranker.predict(dX) rnkvec_dask = dask_ranker.predict(dX)
rnkvec_dask = rnkvec_dask.compute() rnkvec_dask = rnkvec_dask.compute()
rnkvec_dask_local = dask_ranker.to_local().predict(X)
local_ranker = lgb.LGBMRanker(**params) local_ranker = lgb.LGBMRanker(**params)
local_ranker.fit(X, y, sample_weight=w, group=g) local_ranker.fit(X, y, sample_weight=w, group=g)
...@@ -447,35 +402,7 @@ def test_ranker(output, client, listen_port, group): ...@@ -447,35 +402,7 @@ def test_ranker(output, client, listen_port, group):
dcor = spearmanr(rnkvec_dask, y).correlation dcor = spearmanr(rnkvec_dask, y).correlation
assert dcor > 0.6 assert dcor > 0.6
assert spearmanr(rnkvec_dask, rnkvec_local).correlation > 0.75 assert spearmanr(rnkvec_dask, rnkvec_local).correlation > 0.75
assert_eq(rnkvec_dask, rnkvec_dask_local)
client.close()
@pytest.mark.parametrize('output', ['array', 'dataframe'])
@pytest.mark.parametrize('group', [None, group_sizes])
def test_ranker_local_predict(output, client, listen_port, group):
X, y, w, g, dX, dy, dw, dg = _create_ranking_data(
output=output,
group=group
)
dask_ranker = lgb.DaskLGBMRanker(
time_out=5,
local_listen_port=listen_port,
tree_learner='data',
n_estimators=10,
num_leaves=10,
random_state=42,
min_child_samples=1
)
dask_ranker = dask_ranker.fit(dX, dy, group=dg, client=client)
rnkvec_dask = dask_ranker.predict(dX)
rnkvec_dask = rnkvec_dask.compute()
rnkvec_local = dask_ranker.to_local().predict(X)
# distributed and to-local scores should be the same.
assert_eq(rnkvec_dask, rnkvec_local)
client.close() client.close()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment