Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
ac706e10
Unverified
Commit
ac706e10
authored
Jan 24, 2021
by
Nikita Titov
Committed by
GitHub
Jan 24, 2021
Browse files
[dask][tests] reduce code duplication in Dask tests (#3828)
parent
5a4fec6d
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
51 additions
and
47 deletions
+51
-47
tests/python_package_test/test_dask.py
tests/python_package_test/test_dask.py
+51
-47
No files found.
tests/python_package_test/test_dask.py
View file @
ac706e10
...
@@ -133,11 +133,14 @@ def test_classifier(output, centers, client, listen_port):
...
@@ -133,11 +133,14 @@ def test_classifier(output, centers, client, listen_port):
centers
=
centers
centers
=
centers
)
)
params
=
{
"n_estimators"
:
10
,
"num_leaves"
:
10
}
dask_classifier
=
dlgbm
.
DaskLGBMClassifier
(
dask_classifier
=
dlgbm
.
DaskLGBMClassifier
(
time_out
=
5
,
time_out
=
5
,
local_listen_port
=
listen_port
,
local_listen_port
=
listen_port
,
n_estimators
=
10
,
**
params
num_leaves
=
10
)
)
dask_classifier
=
dask_classifier
.
fit
(
dX
,
dy
,
sample_weight
=
dw
,
client
=
client
)
dask_classifier
=
dask_classifier
.
fit
(
dX
,
dy
,
sample_weight
=
dw
,
client
=
client
)
p1
=
dask_classifier
.
predict
(
dX
)
p1
=
dask_classifier
.
predict
(
dX
)
...
@@ -145,7 +148,7 @@ def test_classifier(output, centers, client, listen_port):
...
@@ -145,7 +148,7 @@ def test_classifier(output, centers, client, listen_port):
s1
=
accuracy_score
(
dy
,
p1
)
s1
=
accuracy_score
(
dy
,
p1
)
p1
=
p1
.
compute
()
p1
=
p1
.
compute
()
local_classifier
=
lightgbm
.
LGBMClassifier
(
n_estimators
=
10
,
num_leaves
=
10
)
local_classifier
=
lightgbm
.
LGBMClassifier
(
**
params
)
local_classifier
.
fit
(
X
,
y
,
sample_weight
=
w
)
local_classifier
.
fit
(
X
,
y
,
sample_weight
=
w
)
p2
=
local_classifier
.
predict
(
X
)
p2
=
local_classifier
.
predict
(
X
)
p2_proba
=
local_classifier
.
predict_proba
(
X
)
p2_proba
=
local_classifier
.
predict_proba
(
X
)
...
@@ -169,20 +172,20 @@ def test_classifier_pred_contrib(output, centers, client, listen_port):
...
@@ -169,20 +172,20 @@ def test_classifier_pred_contrib(output, centers, client, listen_port):
centers
=
centers
centers
=
centers
)
)
params
=
{
"n_estimators"
:
10
,
"num_leaves"
:
10
}
dask_classifier
=
dlgbm
.
DaskLGBMClassifier
(
dask_classifier
=
dlgbm
.
DaskLGBMClassifier
(
time_out
=
5
,
time_out
=
5
,
local_listen_port
=
listen_port
,
local_listen_port
=
listen_port
,
tree_learner
=
'data'
,
tree_learner
=
'data'
,
n_estimators
=
10
,
**
params
num_leaves
=
10
)
)
dask_classifier
=
dask_classifier
.
fit
(
dX
,
dy
,
sample_weight
=
dw
,
client
=
client
)
dask_classifier
=
dask_classifier
.
fit
(
dX
,
dy
,
sample_weight
=
dw
,
client
=
client
)
preds_with_contrib
=
dask_classifier
.
predict
(
dX
,
pred_contrib
=
True
).
compute
()
preds_with_contrib
=
dask_classifier
.
predict
(
dX
,
pred_contrib
=
True
).
compute
()
local_classifier
=
lightgbm
.
LGBMClassifier
(
local_classifier
=
lightgbm
.
LGBMClassifier
(
**
params
)
n_estimators
=
10
,
num_leaves
=
10
)
local_classifier
.
fit
(
X
,
y
,
sample_weight
=
w
)
local_classifier
.
fit
(
X
,
y
,
sample_weight
=
w
)
local_preds_with_contrib
=
local_classifier
.
predict
(
X
,
pred_contrib
=
True
)
local_preds_with_contrib
=
local_classifier
.
predict
(
X
,
pred_contrib
=
True
)
...
@@ -243,16 +246,19 @@ def test_classifier_local_predict(client, listen_port):
...
@@ -243,16 +246,19 @@ def test_classifier_local_predict(client, listen_port):
output
=
'array'
output
=
'array'
)
)
params
=
{
"n_estimators"
:
10
,
"num_leaves"
:
10
}
dask_classifier
=
dlgbm
.
DaskLGBMClassifier
(
dask_classifier
=
dlgbm
.
DaskLGBMClassifier
(
time_out
=
5
,
time_out
=
5
,
local_port
=
listen_port
,
local_port
=
listen_port
,
n_estimators
=
10
,
**
params
num_leaves
=
10
)
)
dask_classifier
=
dask_classifier
.
fit
(
dX
,
dy
,
sample_weight
=
dw
,
client
=
client
)
dask_classifier
=
dask_classifier
.
fit
(
dX
,
dy
,
sample_weight
=
dw
,
client
=
client
)
p1
=
dask_classifier
.
to_local
().
predict
(
dX
)
p1
=
dask_classifier
.
to_local
().
predict
(
dX
)
local_classifier
=
lightgbm
.
LGBMClassifier
(
n_estimators
=
10
,
num_leaves
=
10
)
local_classifier
=
lightgbm
.
LGBMClassifier
(
**
params
)
local_classifier
.
fit
(
X
,
y
,
sample_weight
=
w
)
local_classifier
.
fit
(
X
,
y
,
sample_weight
=
w
)
p2
=
local_classifier
.
predict
(
X
)
p2
=
local_classifier
.
predict
(
X
)
...
@@ -270,12 +276,15 @@ def test_regressor(output, client, listen_port):
...
@@ -270,12 +276,15 @@ def test_regressor(output, client, listen_port):
output
=
output
output
=
output
)
)
params
=
{
"random_state"
:
42
,
"num_leaves"
:
10
}
dask_regressor
=
dlgbm
.
DaskLGBMRegressor
(
dask_regressor
=
dlgbm
.
DaskLGBMRegressor
(
time_out
=
5
,
time_out
=
5
,
local_listen_port
=
listen_port
,
local_listen_port
=
listen_port
,
seed
=
42
,
tree
=
'data'
,
num_leaves
=
10
,
**
params
tree
=
'data'
)
)
dask_regressor
=
dask_regressor
.
fit
(
dX
,
dy
,
client
=
client
,
sample_weight
=
dw
)
dask_regressor
=
dask_regressor
.
fit
(
dX
,
dy
,
client
=
client
,
sample_weight
=
dw
)
p1
=
dask_regressor
.
predict
(
dX
)
p1
=
dask_regressor
.
predict
(
dX
)
...
@@ -283,7 +292,7 @@ def test_regressor(output, client, listen_port):
...
@@ -283,7 +292,7 @@ def test_regressor(output, client, listen_port):
s1
=
r2_score
(
dy
,
p1
)
s1
=
r2_score
(
dy
,
p1
)
p1
=
p1
.
compute
()
p1
=
p1
.
compute
()
local_regressor
=
lightgbm
.
LGBMRegressor
(
seed
=
42
,
num_leaves
=
10
)
local_regressor
=
lightgbm
.
LGBMRegressor
(
**
params
)
local_regressor
.
fit
(
X
,
y
,
sample_weight
=
w
)
local_regressor
.
fit
(
X
,
y
,
sample_weight
=
w
)
s2
=
local_regressor
.
score
(
X
,
y
)
s2
=
local_regressor
.
score
(
X
,
y
)
p2
=
local_regressor
.
predict
(
X
)
p2
=
local_regressor
.
predict
(
X
)
...
@@ -306,20 +315,20 @@ def test_regressor_pred_contrib(output, client, listen_port):
...
@@ -306,20 +315,20 @@ def test_regressor_pred_contrib(output, client, listen_port):
output
=
output
output
=
output
)
)
params
=
{
"n_estimators"
:
10
,
"num_leaves"
:
10
}
dask_regressor
=
dlgbm
.
DaskLGBMRegressor
(
dask_regressor
=
dlgbm
.
DaskLGBMRegressor
(
time_out
=
5
,
time_out
=
5
,
local_listen_port
=
listen_port
,
local_listen_port
=
listen_port
,
tree_learner
=
'data'
,
tree_learner
=
'data'
,
n_estimators
=
10
,
**
params
num_leaves
=
10
)
)
dask_regressor
=
dask_regressor
.
fit
(
dX
,
dy
,
sample_weight
=
dw
,
client
=
client
)
dask_regressor
=
dask_regressor
.
fit
(
dX
,
dy
,
sample_weight
=
dw
,
client
=
client
)
preds_with_contrib
=
dask_regressor
.
predict
(
dX
,
pred_contrib
=
True
).
compute
()
preds_with_contrib
=
dask_regressor
.
predict
(
dX
,
pred_contrib
=
True
).
compute
()
local_regressor
=
lightgbm
.
LGBMRegressor
(
local_regressor
=
lightgbm
.
LGBMRegressor
(
**
params
)
n_estimators
=
10
,
num_leaves
=
10
)
local_regressor
.
fit
(
X
,
y
,
sample_weight
=
w
)
local_regressor
.
fit
(
X
,
y
,
sample_weight
=
w
)
local_preds_with_contrib
=
local_regressor
.
predict
(
X
,
pred_contrib
=
True
)
local_preds_with_contrib
=
local_regressor
.
predict
(
X
,
pred_contrib
=
True
)
...
@@ -341,26 +350,23 @@ def test_regressor_quantile(output, client, listen_port, alpha):
...
@@ -341,26 +350,23 @@ def test_regressor_quantile(output, client, listen_port, alpha):
output
=
output
output
=
output
)
)
params
=
{
"objective"
:
"quantile"
,
"alpha"
:
alpha
,
"random_state"
:
42
,
"n_estimators"
:
10
,
"num_leaves"
:
10
}
dask_regressor
=
dlgbm
.
DaskLGBMRegressor
(
dask_regressor
=
dlgbm
.
DaskLGBMRegressor
(
local_listen_port
=
listen_port
,
local_listen_port
=
listen_port
,
seed
=
42
,
tree_learner_type
=
'data_parallel'
,
objective
=
'quantile'
,
**
params
alpha
=
alpha
,
n_estimators
=
10
,
num_leaves
=
10
,
tree_learner_type
=
'data_parallel'
)
)
dask_regressor
=
dask_regressor
.
fit
(
dX
,
dy
,
client
=
client
,
sample_weight
=
dw
)
dask_regressor
=
dask_regressor
.
fit
(
dX
,
dy
,
client
=
client
,
sample_weight
=
dw
)
p1
=
dask_regressor
.
predict
(
dX
).
compute
()
p1
=
dask_regressor
.
predict
(
dX
).
compute
()
q1
=
np
.
count_nonzero
(
y
<
p1
)
/
y
.
shape
[
0
]
q1
=
np
.
count_nonzero
(
y
<
p1
)
/
y
.
shape
[
0
]
local_regressor
=
lightgbm
.
LGBMRegressor
(
local_regressor
=
lightgbm
.
LGBMRegressor
(
**
params
)
seed
=
42
,
objective
=
'quantile'
,
alpha
=
alpha
,
n_estimatores
=
10
,
num_leaves
=
10
)
local_regressor
.
fit
(
X
,
y
,
sample_weight
=
w
)
local_regressor
.
fit
(
X
,
y
,
sample_weight
=
w
)
p2
=
local_regressor
.
predict
(
X
)
p2
=
local_regressor
.
predict
(
X
)
q2
=
np
.
count_nonzero
(
y
<
p2
)
/
y
.
shape
[
0
]
q2
=
np
.
count_nonzero
(
y
<
p2
)
/
y
.
shape
[
0
]
...
@@ -377,7 +383,7 @@ def test_regressor_local_predict(client, listen_port):
...
@@ -377,7 +383,7 @@ def test_regressor_local_predict(client, listen_port):
dask_regressor
=
dlgbm
.
DaskLGBMRegressor
(
dask_regressor
=
dlgbm
.
DaskLGBMRegressor
(
local_listen_port
=
listen_port
,
local_listen_port
=
listen_port
,
seed
=
42
,
random_state
=
42
,
n_estimators
=
10
,
n_estimators
=
10
,
num_leaves
=
10
,
num_leaves
=
10
,
tree_type
=
'data'
tree_type
=
'data'
...
@@ -407,25 +413,23 @@ def test_ranker(output, client, listen_port, group):
...
@@ -407,25 +413,23 @@ def test_ranker(output, client, listen_port, group):
# use many trees + leaves to overfit, help ensure that dask data-parallel strategy matches that of
# use many trees + leaves to overfit, help ensure that dask data-parallel strategy matches that of
# serial learner. See https://github.com/microsoft/LightGBM/issues/3292#issuecomment-671288210.
# serial learner. See https://github.com/microsoft/LightGBM/issues/3292#issuecomment-671288210.
params
=
{
"random_state"
:
42
,
"n_estimators"
:
50
,
"num_leaves"
:
20
,
"min_child_samples"
:
1
}
dask_ranker
=
dlgbm
.
DaskLGBMRanker
(
dask_ranker
=
dlgbm
.
DaskLGBMRanker
(
time_out
=
5
,
time_out
=
5
,
local_listen_port
=
listen_port
,
local_listen_port
=
listen_port
,
tree_learner_type
=
'data_parallel'
,
tree_learner_type
=
'data_parallel'
,
n_estimators
=
50
,
**
params
num_leaves
=
20
,
seed
=
42
,
min_child_samples
=
1
)
)
dask_ranker
=
dask_ranker
.
fit
(
dX
,
dy
,
sample_weight
=
dw
,
group
=
dg
,
client
=
client
)
dask_ranker
=
dask_ranker
.
fit
(
dX
,
dy
,
sample_weight
=
dw
,
group
=
dg
,
client
=
client
)
rnkvec_dask
=
dask_ranker
.
predict
(
dX
)
rnkvec_dask
=
dask_ranker
.
predict
(
dX
)
rnkvec_dask
=
rnkvec_dask
.
compute
()
rnkvec_dask
=
rnkvec_dask
.
compute
()
local_ranker
=
lightgbm
.
LGBMRanker
(
local_ranker
=
lightgbm
.
LGBMRanker
(
**
params
)
n_estimators
=
50
,
num_leaves
=
20
,
seed
=
42
,
min_child_samples
=
1
)
local_ranker
.
fit
(
X
,
y
,
sample_weight
=
w
,
group
=
g
)
local_ranker
.
fit
(
X
,
y
,
sample_weight
=
w
,
group
=
g
)
rnkvec_local
=
local_ranker
.
predict
(
X
)
rnkvec_local
=
local_ranker
.
predict
(
X
)
...
@@ -453,7 +457,7 @@ def test_ranker_local_predict(output, client, listen_port, group):
...
@@ -453,7 +457,7 @@ def test_ranker_local_predict(output, client, listen_port, group):
tree_learner
=
'data'
,
tree_learner
=
'data'
,
n_estimators
=
10
,
n_estimators
=
10
,
num_leaves
=
10
,
num_leaves
=
10
,
seed
=
42
,
random_state
=
42
,
min_child_samples
=
1
min_child_samples
=
1
)
)
dask_ranker
=
dask_ranker
.
fit
(
dX
,
dy
,
group
=
dg
,
client
=
client
)
dask_ranker
=
dask_ranker
.
fit
(
dX
,
dy
,
group
=
dg
,
client
=
client
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment