Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
d47006f4
Unverified
Commit
d47006f4
authored
May 16, 2023
by
James Lamb
Committed by
GitHub
May 15, 2023
Browse files
[ci] [python-package] use ruff, enforce flake8-bugbear and flake8-comprehensions checks (#5871)
parent
452370ac
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
119 additions
and
78 deletions
+119
-78
.ci/lint-python.sh
.ci/lint-python.sh
+3
-10
.ci/test.sh
.ci/test.sh
+2
-3
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+10
-11
python-package/lightgbm/callback.py
python-package/lightgbm/callback.py
+1
-1
python-package/lightgbm/dask.py
python-package/lightgbm/dask.py
+1
-1
python-package/pyproject.toml
python-package/pyproject.toml
+47
-8
tests/distributed/_test_distributed.py
tests/distributed/_test_distributed.py
+4
-4
tests/python_package_test/test_basic.py
tests/python_package_test/test_basic.py
+10
-9
tests/python_package_test/test_dask.py
tests/python_package_test/test_dask.py
+2
-2
tests/python_package_test/test_engine.py
tests/python_package_test/test_engine.py
+10
-10
tests/python_package_test/test_sklearn.py
tests/python_package_test/test_sklearn.py
+29
-19
No files found.
.ci/lint-python.sh
View file @
d47006f4
#!/bin/sh
#!/bin/sh
echo
"running flake8"
echo
"running ruff"
flake8
\
ruff check
\
--config
=
./python-package/setup.cfg
\
.
\
||
exit
-1
echo
"done running flake8"
echo
"running pydocstyle"
pydocstyle
\
--config
=
./python-package/pyproject.toml
\
--config
=
./python-package/pyproject.toml
\
.
\
.
\
||
exit
-1
||
exit
-1
echo
"done running
pydocstyle
"
echo
"done running
ruff
"
echo
"running isort"
echo
"running isort"
isort
\
isort
\
...
...
.ci/test.sh
View file @
d47006f4
...
@@ -71,11 +71,10 @@ if [[ $TASK == "lint" ]]; then
...
@@ -71,11 +71,10 @@ if [[ $TASK == "lint" ]]; then
${
CONDA_PYTHON_REQUIREMENT
}
\
${
CONDA_PYTHON_REQUIREMENT
}
\
cmakelint
\
cmakelint
\
cpplint
\
cpplint
\
flake8
\
isort
\
isort
\
mypy
\
mypy
\
pydocstyle
\
'r-lintr>=3.0'
\
"r-lintr>=3.0"
ruff
source
activate
$CONDA_ENV
source
activate
$CONDA_ENV
echo
"Linting Python code"
echo
"Linting Python code"
sh
${
BUILD_DIRECTORY
}
/.ci/lint-python.sh
||
exit
-1
sh
${
BUILD_DIRECTORY
}
/.ci/lint-python.sh
||
exit
-1
...
...
python-package/lightgbm/basic.py
View file @
d47006f4
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
"""Wrapper for C API of LightGBM."""
"""Wrapper for C API of LightGBM."""
import
abc
import
abc
import
ctypes
import
ctypes
import
inspect
import
json
import
json
import
warnings
import
warnings
from
collections
import
OrderedDict
from
collections
import
OrderedDict
...
@@ -996,8 +997,8 @@ class _InnerPredictor:
...
@@ -996,8 +997,8 @@ class _InnerPredictor:
elif
isinstance
(
data
,
list
):
elif
isinstance
(
data
,
list
):
try
:
try
:
data
=
np
.
array
(
data
)
data
=
np
.
array
(
data
)
except
BaseException
:
except
BaseException
as
err
:
raise
ValueError
(
'Cannot convert data list to numpy array.'
)
raise
ValueError
(
'Cannot convert data list to numpy array.'
)
from
err
preds
,
nrow
=
self
.
__pred_for_np2d
(
preds
,
nrow
=
self
.
__pred_for_np2d
(
mat
=
data
,
mat
=
data
,
start_iteration
=
start_iteration
,
start_iteration
=
start_iteration
,
...
@@ -1015,8 +1016,8 @@ class _InnerPredictor:
...
@@ -1015,8 +1016,8 @@ class _InnerPredictor:
try
:
try
:
_log_warning
(
'Converting data to scipy sparse matrix.'
)
_log_warning
(
'Converting data to scipy sparse matrix.'
)
csr
=
scipy
.
sparse
.
csr_matrix
(
data
)
csr
=
scipy
.
sparse
.
csr_matrix
(
data
)
except
BaseException
:
except
BaseException
as
err
:
raise
TypeError
(
f
'Cannot predict data for type
{
type
(
data
).
__name__
}
'
)
raise
TypeError
(
f
'Cannot predict data for type
{
type
(
data
).
__name__
}
'
)
from
err
preds
,
nrow
=
self
.
__pred_for_csr
(
preds
,
nrow
=
self
.
__pred_for_csr
(
csr
=
csr
,
csr
=
csr
,
start_iteration
=
start_iteration
,
start_iteration
=
start_iteration
,
...
@@ -1802,9 +1803,7 @@ class Dataset:
...
@@ -1802,9 +1803,7 @@ class Dataset:
# process for args
# process for args
params
=
{}
if
params
is
None
else
params
params
=
{}
if
params
is
None
else
params
args_names
=
(
getattr
(
self
.
__class__
,
'_lazy_init'
)
args_names
=
inspect
.
signature
(
self
.
__class__
.
_lazy_init
).
parameters
.
keys
()
.
__code__
.
co_varnames
[:
getattr
(
self
.
__class__
,
'_lazy_init'
).
__code__
.
co_argcount
])
for
key
in
params
.
keys
():
for
key
in
params
.
keys
():
if
key
in
args_names
:
if
key
in
args_names
:
_log_warning
(
f
'
{
key
}
keyword has been found in `params` and will be ignored.
\n
'
_log_warning
(
f
'
{
key
}
keyword has been found in `params` and will be ignored.
\n
'
...
@@ -1868,8 +1867,8 @@ class Dataset:
...
@@ -1868,8 +1867,8 @@ class Dataset:
try
:
try
:
csr
=
scipy
.
sparse
.
csr_matrix
(
data
)
csr
=
scipy
.
sparse
.
csr_matrix
(
data
)
self
.
__init_from_csr
(
csr
,
params_str
,
ref_dataset
)
self
.
__init_from_csr
(
csr
,
params_str
,
ref_dataset
)
except
BaseException
:
except
BaseException
as
err
:
raise
TypeError
(
f
'Cannot initialize Dataset from
{
type
(
data
).
__name__
}
'
)
raise
TypeError
(
f
'Cannot initialize Dataset from
{
type
(
data
).
__name__
}
'
)
from
err
if
label
is
not
None
:
if
label
is
not
None
:
self
.
set_label
(
label
)
self
.
set_label
(
label
)
if
self
.
get_label
()
is
None
:
if
self
.
get_label
()
is
None
:
...
@@ -1920,7 +1919,7 @@ class Dataset:
...
@@ -1920,7 +1919,7 @@ class Dataset:
indices
=
self
.
_create_sample_indices
(
total_nrow
)
indices
=
self
.
_create_sample_indices
(
total_nrow
)
# Select sampled rows, transpose to column order.
# Select sampled rows, transpose to column order.
sampled
=
np
.
array
(
[
row
for
row
in
self
.
_yield_row_from_seqlist
(
seqs
,
indices
)
]
)
sampled
=
np
.
array
(
list
(
self
.
_yield_row_from_seqlist
(
seqs
,
indices
)
)
)
sampled
=
sampled
.
T
sampled
=
sampled
.
T
filtered
=
[]
filtered
=
[]
...
@@ -2777,7 +2776,7 @@ class Dataset:
...
@@ -2777,7 +2776,7 @@ class Dataset:
elif
isinstance
(
self
.
data
,
Sequence
):
elif
isinstance
(
self
.
data
,
Sequence
):
self
.
data
=
self
.
data
[
self
.
used_indices
]
self
.
data
=
self
.
data
[
self
.
used_indices
]
elif
isinstance
(
self
.
data
,
list
)
and
len
(
self
.
data
)
>
0
and
all
(
isinstance
(
x
,
Sequence
)
for
x
in
self
.
data
):
elif
isinstance
(
self
.
data
,
list
)
and
len
(
self
.
data
)
>
0
and
all
(
isinstance
(
x
,
Sequence
)
for
x
in
self
.
data
):
self
.
data
=
np
.
array
(
[
row
for
row
in
self
.
_yield_row_from_seqlist
(
self
.
data
,
self
.
used_indices
)
]
)
self
.
data
=
np
.
array
(
list
(
self
.
_yield_row_from_seqlist
(
self
.
data
,
self
.
used_indices
)
)
)
else
:
else
:
_log_warning
(
f
"Cannot subset
{
type
(
self
.
data
).
__name__
}
type of raw data.
\n
"
_log_warning
(
f
"Cannot subset
{
type
(
self
.
data
).
__name__
}
type of raw data.
\n
"
"Returning original raw data"
)
"Returning original raw data"
)
...
...
python-package/lightgbm/callback.py
View file @
d47006f4
...
@@ -301,7 +301,7 @@ class _EarlyStoppingCallback:
...
@@ -301,7 +301,7 @@ class _EarlyStoppingCallback:
self
.
_reset_storages
()
self
.
_reset_storages
()
n_metrics
=
len
(
set
(
m
[
1
]
for
m
in
env
.
evaluation_result_list
)
)
n_metrics
=
len
(
{
m
[
1
]
for
m
in
env
.
evaluation_result_list
}
)
n_datasets
=
len
(
env
.
evaluation_result_list
)
//
n_metrics
n_datasets
=
len
(
env
.
evaluation_result_list
)
//
n_metrics
if
isinstance
(
self
.
min_delta
,
list
):
if
isinstance
(
self
.
min_delta
,
list
):
if
not
all
(
t
>=
0
for
t
in
self
.
min_delta
):
if
not
all
(
t
>=
0
for
t
in
self
.
min_delta
):
...
...
python-package/lightgbm/dask.py
View file @
d47006f4
...
@@ -787,7 +787,7 @@ def _train(
...
@@ -787,7 +787,7 @@ def _train(
else
:
else
:
if
listen_port_in_params
:
if
listen_port_in_params
:
_log_info
(
"Using passed-in 'local_listen_port' for all workers"
)
_log_info
(
"Using passed-in 'local_listen_port' for all workers"
)
unique_hosts
=
set
(
urlparse
(
a
).
hostname
for
a
in
worker_addresses
)
unique_hosts
=
{
urlparse
(
a
).
hostname
for
a
in
worker_addresses
}
if
len
(
unique_hosts
)
<
len
(
worker_addresses
):
if
len
(
unique_hosts
)
<
len
(
worker_addresses
):
msg
=
(
msg
=
(
"'local_listen_port' was provided in Dask training parameters, but at least one "
"'local_listen_port' was provided in Dask training parameters, but at least one "
...
...
python-package/pyproject.toml
View file @
d47006f4
[tool.isort]
[tool.isort]
line_length
=
120
line_length
=
120
skip_glob
=
[
skip_glob
=
[
"external_libs/*"
,
"
*/
external_libs/*"
,
"lightgbm-python/*"
"
*/
lightgbm-python/*"
]
]
[tool.mypy]
[tool.mypy]
exclude
=
'build/*|compile/*|docs/*|examples/*|external_libs/*|lightgbm-python/*|tests/*'
exclude
=
'build/*|compile/*|docs/*|examples/*|external_libs/*|lightgbm-python/*|tests/*'
ignore_missing_imports
=
true
ignore_missing_imports
=
true
[tool.pydocstyle]
[tool.ruff]
add_ignore
=
[
exclude
=
[
'D105'
"build"
,
"compile"
,
"docs"
,
"external_libs"
,
"lightgbm-python"
,
"setup.py"
]
]
convention
=
'numpy'
ignore
=
[
match
=
'(?!^test_|setup).*\.py'
# (pydocstyle) Missing docstring in magic method
match_dir
=
'^(?!^external_libs|lightgbm-python|test|example).*'
"D105"
,
# (pycodestyle) Line too long
"E501"
]
select
=
[
# flake8-bugbear
"B"
,
# flake8-comprehensions
"C4"
,
# pydocstyle
"D"
,
# pycodestyle
"E"
,
# pyflakes
"F"
]
# this should be set to the oldest version of python LightGBM supports
target-version
=
"py37"
[tool.ruff.per-file-ignores]
"examples/*"
=
[
# pydocstyle
"D"
]
"tests/*"
=
[
# (flake8-bugbear) Found useless expression
"B018"
,
# pydocstyle
"D"
]
[tool.ruff.pydocstyle]
convention
=
"numpy"
tests/distributed/_test_distributed.py
View file @
d47006f4
...
@@ -106,7 +106,7 @@ class DistributedMockup:
...
@@ -106,7 +106,7 @@ class DistributedMockup:
for
i
,
partition
in
enumerate
(
partitions
):
for
i
,
partition
in
enumerate
(
partitions
):
np
.
savetxt
(
str
(
TESTS_DIR
/
f
'train
{
i
}
.txt'
),
partition
,
delimiter
=
','
)
np
.
savetxt
(
str
(
TESTS_DIR
/
f
'train
{
i
}
.txt'
),
partition
,
delimiter
=
','
)
def
fit
(
self
,
partitions
:
List
[
np
.
ndarray
],
train_config
:
Dict
=
{}
)
->
None
:
def
fit
(
self
,
partitions
:
List
[
np
.
ndarray
],
train_config
:
Dict
)
->
None
:
"""Run the distributed training process on a single machine.
"""Run the distributed training process on a single machine.
For each worker i:
For each worker i:
...
@@ -134,7 +134,7 @@ class DistributedMockup:
...
@@ -134,7 +134,7 @@ class DistributedMockup:
if
result
.
returncode
!=
0
:
if
result
.
returncode
!=
0
:
raise
RuntimeError
(
'Error in training'
)
raise
RuntimeError
(
'Error in training'
)
def
predict
(
self
,
predict_config
:
Dict
[
str
,
Any
]
=
{}
)
->
np
.
ndarray
:
def
predict
(
self
,
predict_config
:
Dict
[
str
,
Any
])
->
np
.
ndarray
:
"""Compute the predictions using the model created in the fit step.
"""Compute the predictions using the model created in the fit step.
predict_config is used to predict the training set train.txt
predict_config is used to predict the training set train.txt
...
@@ -178,7 +178,7 @@ def test_classifier(executable):
...
@@ -178,7 +178,7 @@ def test_classifier(executable):
}
}
clf
=
DistributedMockup
(
executable
)
clf
=
DistributedMockup
(
executable
)
clf
.
fit
(
partitions
,
train_params
)
clf
.
fit
(
partitions
,
train_params
)
y_probas
=
clf
.
predict
()
y_probas
=
clf
.
predict
(
predict_config
=
{}
)
y_pred
=
y_probas
>
0.5
y_pred
=
y_probas
>
0.5
assert
accuracy_score
(
clf
.
label_
,
y_pred
)
==
1.
assert
accuracy_score
(
clf
.
label_
,
y_pred
)
==
1.
...
@@ -194,5 +194,5 @@ def test_regressor(executable):
...
@@ -194,5 +194,5 @@ def test_regressor(executable):
}
}
reg
=
DistributedMockup
(
executable
)
reg
=
DistributedMockup
(
executable
)
reg
.
fit
(
partitions
,
train_params
)
reg
.
fit
(
partitions
,
train_params
)
y_pred
=
reg
.
predict
()
y_pred
=
reg
.
predict
(
predict_config
=
{}
)
np
.
testing
.
assert_allclose
(
y_pred
,
reg
.
label_
,
rtol
=
0.2
,
atol
=
50.
)
np
.
testing
.
assert_allclose
(
y_pred
,
reg
.
label_
,
rtol
=
0.2
,
atol
=
50.
)
tests/python_package_test/test_basic.py
View file @
d47006f4
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
import
filecmp
import
filecmp
import
numbers
import
numbers
import
re
import
re
from
copy
import
deepcopy
from
os
import
getenv
from
os
import
getenv
from
pathlib
import
Path
from
pathlib
import
Path
...
@@ -324,7 +325,7 @@ def test_add_features_same_booster_behaviour(tmp_path):
...
@@ -324,7 +325,7 @@ def test_add_features_same_booster_behaviour(tmp_path):
d
.
set_label
(
y
)
d
.
set_label
(
y
)
b1
=
lgb
.
Booster
(
train_set
=
d1
)
b1
=
lgb
.
Booster
(
train_set
=
d1
)
b
=
lgb
.
Booster
(
train_set
=
d
)
b
=
lgb
.
Booster
(
train_set
=
d
)
for
k
in
range
(
10
):
for
_
in
range
(
10
):
b
.
update
()
b
.
update
()
b1
.
update
()
b1
.
update
()
dname
=
tmp_path
/
"d.txt"
dname
=
tmp_path
/
"d.txt"
...
@@ -365,7 +366,7 @@ def test_add_features_from_different_sources():
...
@@ -365,7 +366,7 @@ def test_add_features_from_different_sources():
# test that method works for different data types
# test that method works for different data types
d1
=
lgb
.
Dataset
(
x_1
,
feature_name
=
names
,
free_raw_data
=
False
).
construct
()
d1
=
lgb
.
Dataset
(
x_1
,
feature_name
=
names
,
free_raw_data
=
False
).
construct
()
res_feature_names
=
[
name
for
name
in
names
]
res_feature_names
=
deepcopy
(
names
)
for
idx
,
x_2
in
enumerate
(
xxs
,
2
):
for
idx
,
x_2
in
enumerate
(
xxs
,
2
):
original_type
=
type
(
d1
.
get_data
())
original_type
=
type
(
d1
.
get_data
())
d2
=
lgb
.
Dataset
(
x_2
,
feature_name
=
names
,
free_raw_data
=
False
).
construct
()
d2
=
lgb
.
Dataset
(
x_2
,
feature_name
=
names
,
free_raw_data
=
False
).
construct
()
...
@@ -407,7 +408,7 @@ def test_cegb_affects_behavior(tmp_path):
...
@@ -407,7 +408,7 @@ def test_cegb_affects_behavior(tmp_path):
ds
=
lgb
.
Dataset
(
X
,
feature_name
=
names
).
construct
()
ds
=
lgb
.
Dataset
(
X
,
feature_name
=
names
).
construct
()
ds
.
set_label
(
y
)
ds
.
set_label
(
y
)
base
=
lgb
.
Booster
(
train_set
=
ds
)
base
=
lgb
.
Booster
(
train_set
=
ds
)
for
k
in
range
(
10
):
for
_
in
range
(
10
):
base
.
update
()
base
.
update
()
basename
=
tmp_path
/
"basename.txt"
basename
=
tmp_path
/
"basename.txt"
base
.
save_model
(
basename
)
base
.
save_model
(
basename
)
...
@@ -419,7 +420,7 @@ def test_cegb_affects_behavior(tmp_path):
...
@@ -419,7 +420,7 @@ def test_cegb_affects_behavior(tmp_path):
{
'cegb_penalty_split'
:
1
}]
{
'cegb_penalty_split'
:
1
}]
for
case
in
cases
:
for
case
in
cases
:
booster
=
lgb
.
Booster
(
train_set
=
ds
,
params
=
case
)
booster
=
lgb
.
Booster
(
train_set
=
ds
,
params
=
case
)
for
k
in
range
(
10
):
for
_
in
range
(
10
):
booster
.
update
()
booster
.
update
()
casename
=
tmp_path
/
"casename.txt"
casename
=
tmp_path
/
"casename.txt"
booster
.
save_model
(
casename
)
booster
.
save_model
(
casename
)
...
@@ -445,7 +446,7 @@ def test_cegb_scaling_equalities(tmp_path):
...
@@ -445,7 +446,7 @@ def test_cegb_scaling_equalities(tmp_path):
for
(
p1
,
p2
)
in
pairs
:
for
(
p1
,
p2
)
in
pairs
:
booster1
=
lgb
.
Booster
(
train_set
=
ds
,
params
=
p1
)
booster1
=
lgb
.
Booster
(
train_set
=
ds
,
params
=
p1
)
booster2
=
lgb
.
Booster
(
train_set
=
ds
,
params
=
p2
)
booster2
=
lgb
.
Booster
(
train_set
=
ds
,
params
=
p2
)
for
k
in
range
(
10
):
for
_
in
range
(
10
):
booster1
.
update
()
booster1
.
update
()
booster2
.
update
()
booster2
.
update
()
p1name
=
tmp_path
/
"p1.txt"
p1name
=
tmp_path
/
"p1.txt"
...
@@ -752,10 +753,10 @@ def test_feature_num_bin(min_data_in_bin):
...
@@ -752,10 +753,10 @@ def test_feature_num_bin(min_data_in_bin):
]).
T
]).
T
n_continuous
=
X
.
shape
[
1
]
-
1
n_continuous
=
X
.
shape
[
1
]
-
1
feature_name
=
[
f
'x
{
i
}
'
for
i
in
range
(
n_continuous
)]
+
[
'cat1'
]
feature_name
=
[
f
'x
{
i
}
'
for
i
in
range
(
n_continuous
)]
+
[
'cat1'
]
ds_kwargs
=
dict
(
ds_kwargs
=
{
params
=
{
'min_data_in_bin'
:
min_data_in_bin
},
"
params
"
:
{
'min_data_in_bin'
:
min_data_in_bin
},
categorical_feature
=
[
n_continuous
],
# last feature
"
categorical_feature
"
:
[
n_continuous
],
# last feature
)
}
ds
=
lgb
.
Dataset
(
X
,
feature_name
=
feature_name
,
**
ds_kwargs
).
construct
()
ds
=
lgb
.
Dataset
(
X
,
feature_name
=
feature_name
,
**
ds_kwargs
).
construct
()
expected_num_bins
=
[
expected_num_bins
=
[
100
//
min_data_in_bin
+
1
,
# extra bin for zero
100
//
min_data_in_bin
+
1
,
# extra bin for zero
...
...
tests/python_package_test/test_dask.py
View file @
d47006f4
...
@@ -1062,9 +1062,9 @@ def test_eval_set_no_early_stopping(task, output, eval_sizes, eval_names_prefix,
...
@@ -1062,9 +1062,9 @@ def test_eval_set_no_early_stopping(task, output, eval_sizes, eval_names_prefix,
eval_class_weight
.
append
({
0
:
n_neg
/
n_pos
,
1
:
n_pos
/
n_neg
})
eval_class_weight
.
append
({
0
:
n_neg
/
n_pos
,
1
:
n_pos
/
n_neg
})
init_score_value
=
np
.
log
(
np
.
mean
(
y_e
)
/
(
1
-
np
.
mean
(
y_e
)))
init_score_value
=
np
.
log
(
np
.
mean
(
y_e
)
/
(
1
-
np
.
mean
(
y_e
)))
if
'dataframe'
in
output
:
if
'dataframe'
in
output
:
d_init_score
=
dy_e
.
map_partitions
(
lambda
x
:
pd
.
Series
([
init_score_value
]
*
x
.
size
))
d_init_score
=
dy_e
.
map_partitions
(
lambda
x
,
val
=
init_score_value
:
pd
.
Series
([
val
]
*
x
.
size
))
else
:
else
:
d_init_score
=
dy_e
.
map_blocks
(
lambda
x
:
np
.
repeat
(
init_score_value
,
x
.
size
))
d_init_score
=
dy_e
.
map_blocks
(
lambda
x
,
val
=
init_score_value
:
np
.
repeat
(
val
,
x
.
size
))
eval_init_score
.
append
(
d_init_score
)
eval_init_score
.
append
(
d_init_score
)
...
...
tests/python_package_test/test_engine.py
View file @
d47006f4
...
@@ -886,13 +886,13 @@ def test_early_stopping_min_delta(first_only, single_metric, greater_is_better):
...
@@ -886,13 +886,13 @@ def test_early_stopping_min_delta(first_only, single_metric, greater_is_better):
min_delta
=
metric2min_delta
[
metric
[
0
]]
min_delta
=
metric2min_delta
[
metric
[
0
]]
else
:
else
:
min_delta
=
[
metric2min_delta
[
m
]
for
m
in
metric
]
min_delta
=
[
metric2min_delta
[
m
]
for
m
in
metric
]
train_kwargs
=
dict
(
train_kwargs
=
{
params
=
params
,
"
params
"
:
params
,
train_set
=
train_ds
,
"
train_set
"
:
train_ds
,
num_boost_round
=
50
,
"
num_boost_round
"
:
50
,
valid_sets
=
[
train_ds
,
valid_ds
],
"
valid_sets
"
:
[
train_ds
,
valid_ds
],
valid_names
=
[
'training'
,
'valid'
],
"
valid_names
"
:
[
'training'
,
'valid'
],
)
}
# regular early stopping
# regular early stopping
evals_result
=
{}
evals_result
=
{}
...
@@ -1771,7 +1771,7 @@ def test_monotone_constraints(test_with_categorical_variable):
...
@@ -1771,7 +1771,7 @@ def test_monotone_constraints(test_with_categorical_variable):
for
tree
in
tree_str
:
for
tree
in
tree_str
:
# split_features are in 4th line.
# split_features are in 4th line.
features
=
tree
.
splitlines
()[
3
].
split
(
"="
)[
1
].
split
(
" "
)
features
=
tree
.
splitlines
()[
3
].
split
(
"="
)[
1
].
split
(
" "
)
features
=
set
(
f
"Column_
{
f
}
"
for
f
in
features
)
features
=
{
f
"Column_
{
f
}
"
for
f
in
features
}
feature_sets
.
append
(
features
)
feature_sets
.
append
(
features
)
return
np
.
array
(
feature_sets
)
return
np
.
array
(
feature_sets
)
...
@@ -2860,14 +2860,14 @@ def test_early_stopping_for_only_first_metric():
...
@@ -2860,14 +2860,14 @@ def test_early_stopping_for_only_first_metric():
iter_valid1_l2
=
3
iter_valid1_l2
=
3
iter_valid2_l1
=
3
iter_valid2_l1
=
3
iter_valid2_l2
=
15
iter_valid2_l2
=
15
assert
len
(
set
([
iter_valid1_l1
,
iter_valid1_l2
,
iter_valid2_l1
,
iter_valid2_l2
])
)
==
2
assert
len
(
{
iter_valid1_l1
,
iter_valid1_l2
,
iter_valid2_l1
,
iter_valid2_l2
}
)
==
2
iter_min_l1
=
min
([
iter_valid1_l1
,
iter_valid2_l1
])
iter_min_l1
=
min
([
iter_valid1_l1
,
iter_valid2_l1
])
iter_min_l2
=
min
([
iter_valid1_l2
,
iter_valid2_l2
])
iter_min_l2
=
min
([
iter_valid1_l2
,
iter_valid2_l2
])
iter_min_valid1
=
min
([
iter_valid1_l1
,
iter_valid1_l2
])
iter_min_valid1
=
min
([
iter_valid1_l1
,
iter_valid1_l2
])
iter_cv_l1
=
15
iter_cv_l1
=
15
iter_cv_l2
=
13
iter_cv_l2
=
13
assert
len
(
set
([
iter_cv_l1
,
iter_cv_l2
])
)
==
2
assert
len
(
{
iter_cv_l1
,
iter_cv_l2
}
)
==
2
iter_cv_min
=
min
([
iter_cv_l1
,
iter_cv_l2
])
iter_cv_min
=
min
([
iter_cv_l1
,
iter_cv_l2
])
# test for lgb.train
# test for lgb.train
...
...
tests/python_package_test/test_sklearn.py
View file @
d47006f4
...
@@ -313,20 +313,24 @@ def test_grid_search():
...
@@ -313,20 +313,24 @@ def test_grid_search():
y
=
y
.
astype
(
str
)
# utilize label encoder at it's max power
y
=
y
.
astype
(
str
)
# utilize label encoder at it's max power
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
X_train
,
X_val
,
y_train
,
y_val
=
train_test_split
(
X_train
,
y_train
,
test_size
=
0.1
,
random_state
=
42
)
X_train
,
X_val
,
y_train
,
y_val
=
train_test_split
(
X_train
,
y_train
,
test_size
=
0.1
,
random_state
=
42
)
params
=
dict
(
subsample
=
0.8
,
params
=
{
subsample_freq
=
1
)
"subsample"
:
0.8
,
grid_params
=
dict
(
boosting_type
=
[
'rf'
,
'gbdt'
],
"subsample_freq"
:
1
n_estimators
=
[
4
,
6
],
}
reg_alpha
=
[
0.01
,
0.005
])
grid_params
=
{
"boosting_type"
:
[
'rf'
,
'gbdt'
],
"n_estimators"
:
[
4
,
6
],
"reg_alpha"
:
[
0.01
,
0.005
]
}
evals_result
=
{}
evals_result
=
{}
fit_params
=
dict
(
fit_params
=
{
eval_set
=
[(
X_val
,
y_val
)],
"
eval_set
"
:
[(
X_val
,
y_val
)],
eval_metric
=
constant_metric
,
"
eval_metric
"
:
constant_metric
,
callbacks
=
[
"
callbacks
"
:
[
lgb
.
early_stopping
(
2
),
lgb
.
early_stopping
(
2
),
lgb
.
record_evaluation
(
evals_result
)
lgb
.
record_evaluation
(
evals_result
)
]
]
)
}
grid
=
GridSearchCV
(
estimator
=
lgb
.
LGBMClassifier
(
**
params
),
param_grid
=
grid_params
,
cv
=
2
)
grid
=
GridSearchCV
(
estimator
=
lgb
.
LGBMClassifier
(
**
params
),
param_grid
=
grid_params
,
cv
=
2
)
grid
.
fit
(
X_train
,
y_train
,
**
fit_params
)
grid
.
fit
(
X_train
,
y_train
,
**
fit_params
)
score
=
grid
.
score
(
X_test
,
y_test
)
# utilizes GridSearchCV default refit=True
score
=
grid
.
score
(
X_test
,
y_test
)
# utilizes GridSearchCV default refit=True
...
@@ -350,14 +354,20 @@ def test_random_search():
...
@@ -350,14 +354,20 @@ def test_random_search():
X_train
,
X_val
,
y_train
,
y_val
=
train_test_split
(
X_train
,
y_train
,
test_size
=
0.1
,
X_train
,
X_val
,
y_train
,
y_val
=
train_test_split
(
X_train
,
y_train
,
test_size
=
0.1
,
random_state
=
42
)
random_state
=
42
)
n_iter
=
3
# Number of samples
n_iter
=
3
# Number of samples
params
=
dict
(
subsample
=
0.8
,
params
=
{
subsample_freq
=
1
)
"subsample"
:
0.8
,
param_dist
=
dict
(
boosting_type
=
[
'rf'
,
'gbdt'
],
"subsample_freq"
:
1
n_estimators
=
[
np
.
random
.
randint
(
low
=
3
,
high
=
10
)
for
i
in
range
(
n_iter
)],
}
reg_alpha
=
[
np
.
random
.
uniform
(
low
=
0.01
,
high
=
0.06
)
for
i
in
range
(
n_iter
)])
param_dist
=
{
fit_params
=
dict
(
eval_set
=
[(
X_val
,
y_val
)],
"boosting_type"
:
[
'rf'
,
'gbdt'
],
eval_metric
=
constant_metric
,
"n_estimators"
:
[
np
.
random
.
randint
(
low
=
3
,
high
=
10
)
for
i
in
range
(
n_iter
)],
callbacks
=
[
lgb
.
early_stopping
(
2
)])
"reg_alpha"
:
[
np
.
random
.
uniform
(
low
=
0.01
,
high
=
0.06
)
for
i
in
range
(
n_iter
)]
}
fit_params
=
{
"eval_set"
:
[(
X_val
,
y_val
)],
"eval_metric"
:
constant_metric
,
"callbacks"
:
[
lgb
.
early_stopping
(
2
)]
}
rand
=
RandomizedSearchCV
(
estimator
=
lgb
.
LGBMClassifier
(
**
params
),
rand
=
RandomizedSearchCV
(
estimator
=
lgb
.
LGBMClassifier
(
**
params
),
param_distributions
=
param_dist
,
cv
=
2
,
param_distributions
=
param_dist
,
cv
=
2
,
n_iter
=
n_iter
,
random_state
=
42
)
n_iter
=
n_iter
,
random_state
=
42
)
...
@@ -1139,7 +1149,7 @@ def test_first_metric_only():
...
@@ -1139,7 +1149,7 @@ def test_first_metric_only():
iter_valid1_l2
=
4
iter_valid1_l2
=
4
iter_valid2_l1
=
2
iter_valid2_l1
=
2
iter_valid2_l2
=
2
iter_valid2_l2
=
2
assert
len
(
set
([
iter_valid1_l1
,
iter_valid1_l2
,
iter_valid2_l1
,
iter_valid2_l2
])
)
==
2
assert
len
(
{
iter_valid1_l1
,
iter_valid1_l2
,
iter_valid2_l1
,
iter_valid2_l2
}
)
==
2
iter_min_l1
=
min
([
iter_valid1_l1
,
iter_valid2_l1
])
iter_min_l1
=
min
([
iter_valid1_l1
,
iter_valid2_l1
])
iter_min_l2
=
min
([
iter_valid1_l2
,
iter_valid2_l2
])
iter_min_l2
=
min
([
iter_valid1_l2
,
iter_valid2_l2
])
iter_min
=
min
([
iter_min_l1
,
iter_min_l2
])
iter_min
=
min
([
iter_min_l1
,
iter_min_l2
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment