Unverified Commit ac706e10 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[dask][tests] reduce code duplication in Dask tests (#3828)

parent 5a4fec6d
...@@ -133,11 +133,14 @@ def test_classifier(output, centers, client, listen_port): ...@@ -133,11 +133,14 @@ def test_classifier(output, centers, client, listen_port):
centers=centers centers=centers
) )
params = {
"n_estimators": 10,
"num_leaves": 10
}
dask_classifier = dlgbm.DaskLGBMClassifier( dask_classifier = dlgbm.DaskLGBMClassifier(
time_out=5, time_out=5,
local_listen_port=listen_port, local_listen_port=listen_port,
n_estimators=10, **params
num_leaves=10
) )
dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client) dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client)
p1 = dask_classifier.predict(dX) p1 = dask_classifier.predict(dX)
...@@ -145,7 +148,7 @@ def test_classifier(output, centers, client, listen_port): ...@@ -145,7 +148,7 @@ def test_classifier(output, centers, client, listen_port):
s1 = accuracy_score(dy, p1) s1 = accuracy_score(dy, p1)
p1 = p1.compute() p1 = p1.compute()
local_classifier = lightgbm.LGBMClassifier(n_estimators=10, num_leaves=10) local_classifier = lightgbm.LGBMClassifier(**params)
local_classifier.fit(X, y, sample_weight=w) local_classifier.fit(X, y, sample_weight=w)
p2 = local_classifier.predict(X) p2 = local_classifier.predict(X)
p2_proba = local_classifier.predict_proba(X) p2_proba = local_classifier.predict_proba(X)
...@@ -169,20 +172,20 @@ def test_classifier_pred_contrib(output, centers, client, listen_port): ...@@ -169,20 +172,20 @@ def test_classifier_pred_contrib(output, centers, client, listen_port):
centers=centers centers=centers
) )
params = {
"n_estimators": 10,
"num_leaves": 10
}
dask_classifier = dlgbm.DaskLGBMClassifier( dask_classifier = dlgbm.DaskLGBMClassifier(
time_out=5, time_out=5,
local_listen_port=listen_port, local_listen_port=listen_port,
tree_learner='data', tree_learner='data',
n_estimators=10, **params
num_leaves=10
) )
dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client) dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client)
preds_with_contrib = dask_classifier.predict(dX, pred_contrib=True).compute() preds_with_contrib = dask_classifier.predict(dX, pred_contrib=True).compute()
local_classifier = lightgbm.LGBMClassifier( local_classifier = lightgbm.LGBMClassifier(**params)
n_estimators=10,
num_leaves=10
)
local_classifier.fit(X, y, sample_weight=w) local_classifier.fit(X, y, sample_weight=w)
local_preds_with_contrib = local_classifier.predict(X, pred_contrib=True) local_preds_with_contrib = local_classifier.predict(X, pred_contrib=True)
...@@ -243,16 +246,19 @@ def test_classifier_local_predict(client, listen_port): ...@@ -243,16 +246,19 @@ def test_classifier_local_predict(client, listen_port):
output='array' output='array'
) )
params = {
"n_estimators": 10,
"num_leaves": 10
}
dask_classifier = dlgbm.DaskLGBMClassifier( dask_classifier = dlgbm.DaskLGBMClassifier(
time_out=5, time_out=5,
local_port=listen_port, local_port=listen_port,
n_estimators=10, **params
num_leaves=10
) )
dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client) dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client)
p1 = dask_classifier.to_local().predict(dX) p1 = dask_classifier.to_local().predict(dX)
local_classifier = lightgbm.LGBMClassifier(n_estimators=10, num_leaves=10) local_classifier = lightgbm.LGBMClassifier(**params)
local_classifier.fit(X, y, sample_weight=w) local_classifier.fit(X, y, sample_weight=w)
p2 = local_classifier.predict(X) p2 = local_classifier.predict(X)
...@@ -270,12 +276,15 @@ def test_regressor(output, client, listen_port): ...@@ -270,12 +276,15 @@ def test_regressor(output, client, listen_port):
output=output output=output
) )
params = {
"random_state": 42,
"num_leaves": 10
}
dask_regressor = dlgbm.DaskLGBMRegressor( dask_regressor = dlgbm.DaskLGBMRegressor(
time_out=5, time_out=5,
local_listen_port=listen_port, local_listen_port=listen_port,
seed=42, tree='data',
num_leaves=10, **params
tree='data'
) )
dask_regressor = dask_regressor.fit(dX, dy, client=client, sample_weight=dw) dask_regressor = dask_regressor.fit(dX, dy, client=client, sample_weight=dw)
p1 = dask_regressor.predict(dX) p1 = dask_regressor.predict(dX)
...@@ -283,7 +292,7 @@ def test_regressor(output, client, listen_port): ...@@ -283,7 +292,7 @@ def test_regressor(output, client, listen_port):
s1 = r2_score(dy, p1) s1 = r2_score(dy, p1)
p1 = p1.compute() p1 = p1.compute()
local_regressor = lightgbm.LGBMRegressor(seed=42, num_leaves=10) local_regressor = lightgbm.LGBMRegressor(**params)
local_regressor.fit(X, y, sample_weight=w) local_regressor.fit(X, y, sample_weight=w)
s2 = local_regressor.score(X, y) s2 = local_regressor.score(X, y)
p2 = local_regressor.predict(X) p2 = local_regressor.predict(X)
...@@ -306,20 +315,20 @@ def test_regressor_pred_contrib(output, client, listen_port): ...@@ -306,20 +315,20 @@ def test_regressor_pred_contrib(output, client, listen_port):
output=output output=output
) )
params = {
"n_estimators": 10,
"num_leaves": 10
}
dask_regressor = dlgbm.DaskLGBMRegressor( dask_regressor = dlgbm.DaskLGBMRegressor(
time_out=5, time_out=5,
local_listen_port=listen_port, local_listen_port=listen_port,
tree_learner='data', tree_learner='data',
n_estimators=10, **params
num_leaves=10
) )
dask_regressor = dask_regressor.fit(dX, dy, sample_weight=dw, client=client) dask_regressor = dask_regressor.fit(dX, dy, sample_weight=dw, client=client)
preds_with_contrib = dask_regressor.predict(dX, pred_contrib=True).compute() preds_with_contrib = dask_regressor.predict(dX, pred_contrib=True).compute()
local_regressor = lightgbm.LGBMRegressor( local_regressor = lightgbm.LGBMRegressor(**params)
n_estimators=10,
num_leaves=10
)
local_regressor.fit(X, y, sample_weight=w) local_regressor.fit(X, y, sample_weight=w)
local_preds_with_contrib = local_regressor.predict(X, pred_contrib=True) local_preds_with_contrib = local_regressor.predict(X, pred_contrib=True)
...@@ -341,26 +350,23 @@ def test_regressor_quantile(output, client, listen_port, alpha): ...@@ -341,26 +350,23 @@ def test_regressor_quantile(output, client, listen_port, alpha):
output=output output=output
) )
params = {
"objective": "quantile",
"alpha": alpha,
"random_state": 42,
"n_estimators": 10,
"num_leaves": 10
}
dask_regressor = dlgbm.DaskLGBMRegressor( dask_regressor = dlgbm.DaskLGBMRegressor(
local_listen_port=listen_port, local_listen_port=listen_port,
seed=42, tree_learner_type='data_parallel',
objective='quantile', **params
alpha=alpha,
n_estimators=10,
num_leaves=10,
tree_learner_type='data_parallel'
) )
dask_regressor = dask_regressor.fit(dX, dy, client=client, sample_weight=dw) dask_regressor = dask_regressor.fit(dX, dy, client=client, sample_weight=dw)
p1 = dask_regressor.predict(dX).compute() p1 = dask_regressor.predict(dX).compute()
q1 = np.count_nonzero(y < p1) / y.shape[0] q1 = np.count_nonzero(y < p1) / y.shape[0]
local_regressor = lightgbm.LGBMRegressor( local_regressor = lightgbm.LGBMRegressor(**params)
seed=42,
objective='quantile',
alpha=alpha,
n_estimatores=10,
num_leaves=10
)
local_regressor.fit(X, y, sample_weight=w) local_regressor.fit(X, y, sample_weight=w)
p2 = local_regressor.predict(X) p2 = local_regressor.predict(X)
q2 = np.count_nonzero(y < p2) / y.shape[0] q2 = np.count_nonzero(y < p2) / y.shape[0]
...@@ -377,7 +383,7 @@ def test_regressor_local_predict(client, listen_port): ...@@ -377,7 +383,7 @@ def test_regressor_local_predict(client, listen_port):
dask_regressor = dlgbm.DaskLGBMRegressor( dask_regressor = dlgbm.DaskLGBMRegressor(
local_listen_port=listen_port, local_listen_port=listen_port,
seed=42, random_state=42,
n_estimators=10, n_estimators=10,
num_leaves=10, num_leaves=10,
tree_type='data' tree_type='data'
...@@ -407,25 +413,23 @@ def test_ranker(output, client, listen_port, group): ...@@ -407,25 +413,23 @@ def test_ranker(output, client, listen_port, group):
# use many trees + leaves to overfit, help ensure that dask data-parallel strategy matches that of # use many trees + leaves to overfit, help ensure that dask data-parallel strategy matches that of
# serial learner. See https://github.com/microsoft/LightGBM/issues/3292#issuecomment-671288210. # serial learner. See https://github.com/microsoft/LightGBM/issues/3292#issuecomment-671288210.
params = {
"random_state": 42,
"n_estimators": 50,
"num_leaves": 20,
"min_child_samples": 1
}
dask_ranker = dlgbm.DaskLGBMRanker( dask_ranker = dlgbm.DaskLGBMRanker(
time_out=5, time_out=5,
local_listen_port=listen_port, local_listen_port=listen_port,
tree_learner_type='data_parallel', tree_learner_type='data_parallel',
n_estimators=50, **params
num_leaves=20,
seed=42,
min_child_samples=1
) )
dask_ranker = dask_ranker.fit(dX, dy, sample_weight=dw, group=dg, client=client) dask_ranker = dask_ranker.fit(dX, dy, sample_weight=dw, group=dg, client=client)
rnkvec_dask = dask_ranker.predict(dX) rnkvec_dask = dask_ranker.predict(dX)
rnkvec_dask = rnkvec_dask.compute() rnkvec_dask = rnkvec_dask.compute()
local_ranker = lightgbm.LGBMRanker( local_ranker = lightgbm.LGBMRanker(**params)
n_estimators=50,
num_leaves=20,
seed=42,
min_child_samples=1
)
local_ranker.fit(X, y, sample_weight=w, group=g) local_ranker.fit(X, y, sample_weight=w, group=g)
rnkvec_local = local_ranker.predict(X) rnkvec_local = local_ranker.predict(X)
...@@ -453,7 +457,7 @@ def test_ranker_local_predict(output, client, listen_port, group): ...@@ -453,7 +457,7 @@ def test_ranker_local_predict(output, client, listen_port, group):
tree_learner='data', tree_learner='data',
n_estimators=10, n_estimators=10,
num_leaves=10, num_leaves=10,
seed=42, random_state=42,
min_child_samples=1 min_child_samples=1
) )
dask_ranker = dask_ranker.fit(dX, dy, group=dg, client=client) dask_ranker = dask_ranker.fit(dX, dy, group=dg, client=client)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment