[dask] factor dask-ml out of tests (fixes #3796) (#3849)

* [dask] factor dask-ml out of tests (fixes #3796) * Update tests/python_package_test/test_dask.py Co-authored-by: Nikita Titov <nekit94-08@mail.ru> Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

[dask] factor dask-ml out of tests (fixes #3796) (#3849)
* [dask] factor dask-ml out of tests (fixes #3796) * Update tests/python_package_test/test_dask.py Co-authored-by: Nikita Titov <nekit94-08@mail.ru> Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
0297719c · James Lamb · GitHub · f21e0efc · 0297719c · 0297719c
Unverified Commit 0297719c authored Jan 25, 2021 by James Lamb Committed by GitHub Jan 25, 2021
Show whitespace changes
Inline Side-by-side

Showing with 14 additions and 5 deletions

.ci/test.sh .ci/test.sh +1 -1

tests/python_package_test/test_dask.py tests/python_package_test/test_dask.py +13 -4

No files found.
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -87,7 +87,7 @@ if [[ $TASK == "swig" ]]; then
    exit 0
 fi

-conda install -q -y -n $CONDA_ENV dask dask-ml distributed joblib matplotlib numpy pandas psutil pytest scikit-learn scipy
+conda install -q -y -n $CONDA_ENV dask distributed joblib matplotlib numpy pandas psutil pytest scikit-learn scipy

 # graphviz must come from conda-forge to avoid this on some linux distros:
 # https://github.com/conda-forge/graphviz-feedstock/issues/18

--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -19,7 +19,6 @@ import numpy as np
 import pandas as pd
 from scipy.stats import spearmanr
 from dask.array.utils import assert_eq
-from dask_ml.metrics import accuracy_score, r2_score
 from distributed.utils_test import client, cluster_fixture, gen_cluster, loop
 from scipy.sparse import csr_matrix
 from sklearn.datasets import make_blobs, make_regression
@@ -124,6 +123,16 @@ def _create_data(objective, n_samples=100, centers=2, output='array', chunk_size
    return X, y, weights, dX, dy, dw


+def _r2_score(dy_true, dy_pred):
+    numerator = ((dy_true - dy_pred) ** 2).sum(axis=0, dtype=np.float64)
+    denominator = ((dy_true - dy_pred.mean(axis=0)) ** 2).sum(axis=0, dtype=np.float64)
+    return (1 - numerator / denominator).compute()
+
+
+def _accuracy_score(dy_true, dy_pred):
+    return da.average(dy_true == dy_pred).compute()
+
+
 @pytest.mark.parametrize('output', data_output)
 @pytest.mark.parametrize('centers', data_centers)
 def test_classifier(output, centers, client, listen_port):
@@ -145,7 +154,7 @@ def test_classifier(output, centers, client, listen_port):
    dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client)
    p1 = dask_classifier.predict(dX)
    p1_proba = dask_classifier.predict_proba(dX).compute()
-    s1 = accuracy_score(dy, p1)
+    s1 = _accuracy_score(dy, p1)
    p1 = p1.compute()

    local_classifier = lgb.LGBMClassifier(**params)
@@ -289,7 +298,7 @@ def test_regressor(output, client, listen_port):
    dask_regressor = dask_regressor.fit(dX, dy, client=client, sample_weight=dw)
    p1 = dask_regressor.predict(dX)
    if output != 'dataframe':
-        s1 = r2_score(dy, p1)
+        s1 = _r2_score(dy, p1)
    p1 = p1.compute()

    local_regressor = lgb.LGBMRegressor(**params)
@@ -391,7 +400,7 @@ def test_regressor_local_predict(client, listen_port):
    dask_regressor = dask_regressor.fit(dX, dy, sample_weight=dw, client=client)
    p1 = dask_regressor.predict(dX)
    p2 = dask_regressor.to_local().predict(X)
-    s1 = r2_score(dy, p1)
+    s1 = _r2_score(dy, p1)
    p1 = p1.compute()
    s2 = dask_regressor.to_local().score(X, y)