Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
a39141e1
Commit
a39141e1
authored
May 02, 2017
by
wxchan
Committed by
Guolin Ke
May 02, 2017
Browse files
re-write test cases: remove global template (#479)
parent
89c69987
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
186 additions
and
122 deletions
+186
-122
tests/python_package_test/test_engine.py
tests/python_package_test/test_engine.py
+122
-78
tests/python_package_test/test_sklearn.py
tests/python_package_test/test_sklearn.py
+64
-44
No files found.
tests/python_package_test/test_engine.py
View file @
a39141e1
...
@@ -28,72 +28,76 @@ def multi_logloss(y_true, y_pred):
...
@@ -28,72 +28,76 @@ def multi_logloss(y_true, y_pred):
return
np
.
mean
([
-
math
.
log
(
y_pred
[
i
][
y
])
for
i
,
y
in
enumerate
(
y_true
)])
return
np
.
mean
([
-
math
.
log
(
y_pred
[
i
][
y
])
for
i
,
y
in
enumerate
(
y_true
)])
class
template
(
object
):
@
staticmethod
def
test_template
(
params
=
{
'objective'
:
'regression'
,
'metric'
:
'l2'
},
X_y
=
load_boston
(
True
),
feval
=
mean_squared_error
,
num_round
=
50
,
init_model
=
None
,
custom_eval
=
None
,
early_stopping_rounds
=
2
,
return_data
=
False
,
return_model
=
False
):
params
[
'verbose'
],
params
[
'seed'
]
=
-
1
,
42
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
*
X_y
,
test_size
=
0.1
,
random_state
=
42
)
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
,
params
=
params
)
lgb_eval
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
,
params
=
params
)
if
return_data
:
return
lgb_train
,
lgb_eval
evals_result
=
{}
gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
num_round
,
valid_sets
=
lgb_eval
,
valid_names
=
'eval'
,
verbose_eval
=
False
,
feval
=
custom_eval
,
evals_result
=
evals_result
,
early_stopping_rounds
=
early_stopping_rounds
,
init_model
=
init_model
)
if
return_model
:
return
gbm
else
:
return
evals_result
,
feval
(
y_test
,
gbm
.
predict
(
X_test
,
gbm
.
best_iteration
))
class
TestEngine
(
unittest
.
TestCase
):
class
TestEngine
(
unittest
.
TestCase
):
def
test_binary
(
self
):
def
test_binary
(
self
):
X_y
=
load_breast_cancer
(
True
)
X
,
y
=
load_breast_cancer
(
True
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
params
=
{
params
=
{
'objective'
:
'binary'
,
'objective'
:
'binary'
,
'metric'
:
'binary_logloss'
'metric'
:
'binary_logloss'
,
'verbose'
:
-
1
}
}
evals_result
,
ret
=
template
.
test_template
(
params
,
X_y
,
log_loss
)
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
)
lgb_eval
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
)
evals_result
=
{}
gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
50
,
valid_sets
=
lgb_eval
,
verbose_eval
=
False
,
evals_result
=
evals_result
)
ret
=
log_loss
(
y_test
,
gbm
.
predict
(
X_test
))
self
.
assertLess
(
ret
,
0.15
)
self
.
assertLess
(
ret
,
0.15
)
self
.
assertAlmostEqual
(
min
(
evals_result
[
'
e
val'
][
'binary_logloss'
]
)
,
ret
,
places
=
5
)
self
.
assertAlmostEqual
(
evals_result
[
'val
id_0
'
][
'binary_logloss'
]
[
-
1
]
,
ret
,
places
=
5
)
def
test_regreesion
(
self
):
def
test_regreesion
(
self
):
evals_result
,
ret
=
template
.
test_template
()
X
,
y
=
load_boston
(
True
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
params
=
{
'metric'
:
'l2'
,
'verbose'
:
-
1
}
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
)
lgb_eval
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
)
evals_result
=
{}
gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
50
,
valid_sets
=
lgb_eval
,
verbose_eval
=
False
,
evals_result
=
evals_result
)
ret
=
mean_squared_error
(
y_test
,
gbm
.
predict
(
X_test
))
self
.
assertLess
(
ret
,
16
)
self
.
assertLess
(
ret
,
16
)
self
.
assertAlmostEqual
(
min
(
evals_result
[
'
e
val'
][
'l2'
]
)
,
ret
,
places
=
5
)
self
.
assertAlmostEqual
(
evals_result
[
'val
id_0
'
][
'l2'
]
[
-
1
]
,
ret
,
places
=
5
)
def
test_multiclass
(
self
):
def
test_multiclass
(
self
):
X_y
=
load_digits
(
10
,
True
)
X
,
y
=
load_digits
(
10
,
True
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
params
=
{
params
=
{
'objective'
:
'multiclass'
,
'objective'
:
'multiclass'
,
'metric'
:
'multi_logloss'
,
'metric'
:
'multi_logloss'
,
'num_class'
:
10
'num_class'
:
10
,
'verbose'
:
-
1
}
}
evals_result
,
ret
=
template
.
test_template
(
params
,
X_y
,
multi_logloss
)
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
,
params
=
params
)
lgb_eval
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
,
params
=
params
)
evals_result
=
{}
gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
50
,
valid_sets
=
lgb_eval
,
verbose_eval
=
False
,
evals_result
=
evals_result
)
ret
=
multi_logloss
(
y_test
,
gbm
.
predict
(
X_test
))
self
.
assertLess
(
ret
,
0.2
)
self
.
assertLess
(
ret
,
0.2
)
self
.
assertAlmostEqual
(
min
(
evals_result
[
'
e
val'
][
'multi_logloss'
]
)
,
ret
,
places
=
5
)
self
.
assertAlmostEqual
(
evals_result
[
'val
id_0
'
][
'multi_logloss'
]
[
-
1
]
,
ret
,
places
=
5
)
def
test_early_stopping
(
self
):
def
test_early_stopping
(
self
):
X
_
y
=
load_breast_cancer
(
True
)
X
,
y
=
load_breast_cancer
(
True
)
params
=
{
params
=
{
'objective'
:
'binary'
,
'objective'
:
'binary'
,
'metric'
:
'binary_logloss'
,
'metric'
:
'binary_logloss'
,
'verbose'
:
-
1
,
'verbose'
:
-
1
'seed'
:
42
}
}
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
*
X_
y
,
test_size
=
0.1
,
random_state
=
42
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
)
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
)
lgb_eval
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
)
lgb_eval
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
)
valid_set_name
=
'valid_set'
valid_set_name
=
'valid_set'
...
@@ -109,7 +113,6 @@ class TestEngine(unittest.TestCase):
...
@@ -109,7 +113,6 @@ class TestEngine(unittest.TestCase):
self
.
assertIn
(
'binary_logloss'
,
gbm
.
best_score
[
valid_set_name
])
self
.
assertIn
(
'binary_logloss'
,
gbm
.
best_score
[
valid_set_name
])
# early stopping occurs
# early stopping occurs
gbm
=
lgb
.
train
(
params
,
lgb_train
,
gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
100
,
valid_sets
=
lgb_eval
,
valid_sets
=
lgb_eval
,
valid_names
=
valid_set_name
,
valid_names
=
valid_set_name
,
verbose_eval
=
False
,
verbose_eval
=
False
,
...
@@ -118,85 +121,126 @@ class TestEngine(unittest.TestCase):
...
@@ -118,85 +121,126 @@ class TestEngine(unittest.TestCase):
self
.
assertIn
(
valid_set_name
,
gbm
.
best_score
)
self
.
assertIn
(
valid_set_name
,
gbm
.
best_score
)
self
.
assertIn
(
'binary_logloss'
,
gbm
.
best_score
[
valid_set_name
])
self
.
assertIn
(
'binary_logloss'
,
gbm
.
best_score
[
valid_set_name
])
def
test_continue_train_and_other
(
self
):
def
test_continue_train_and_dump_model
(
self
):
X
,
y
=
load_boston
(
True
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
params
=
{
params
=
{
'objective'
:
'regression'
,
'objective'
:
'regression'
,
'metric'
:
'l1'
'metric'
:
'l1'
,
'verbose'
:
-
1
}
}
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
,
free_raw_data
=
False
)
lgb_eval
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
,
free_raw_data
=
False
)
init_gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
20
)
model_name
=
'model.txt'
model_name
=
'model.txt'
gbm
=
template
.
test_template
(
params
,
num_round
=
20
,
return_model
=
True
,
early_stopping_rounds
=-
1
)
init_gbm
.
save_model
(
model_name
)
gbm
.
save_model
(
model_name
)
evals_result
=
{}
evals_result
,
ret
=
template
.
test_template
(
params
,
feval
=
mean_absolute_error
,
gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_round
=
80
,
init_model
=
model_name
,
num_boost_round
=
30
,
custom_eval
=
(
lambda
p
,
d
:
(
'mae'
,
mean_absolute_error
(
p
,
d
.
get_label
()),
False
)))
valid_sets
=
lgb_eval
,
verbose_eval
=
False
,
# test custom eval metrics
feval
=
(
lambda
p
,
d
:
(
'mae'
,
mean_absolute_error
(
p
,
d
.
get_label
()),
False
)),
evals_result
=
evals_result
,
init_model
=
'model.txt'
)
ret
=
mean_absolute_error
(
y_test
,
gbm
.
predict
(
X_test
))
self
.
assertLess
(
ret
,
3.5
)
self
.
assertLess
(
ret
,
3.5
)
self
.
assertAlmostEqual
(
min
(
evals_result
[
'
e
val'
][
'l1'
]
)
,
ret
,
places
=
5
)
self
.
assertAlmostEqual
(
evals_result
[
'val
id_0
'
][
'l1'
]
[
-
1
]
,
ret
,
places
=
5
)
for
l1
,
mae
in
zip
(
evals_result
[
'
e
val'
][
'l1'
],
evals_result
[
'
e
val'
][
'mae'
]):
for
l1
,
mae
in
zip
(
evals_result
[
'val
id_0
'
][
'l1'
],
evals_result
[
'val
id_0
'
][
'mae'
]):
self
.
assertAlmostEqual
(
l1
,
mae
,
places
=
5
)
self
.
assertAlmostEqual
(
l1
,
mae
,
places
=
5
)
# test dump model
self
.
assertIn
(
'tree_info'
,
gbm
.
dump_model
())
self
.
assertIn
(
'tree_info'
,
gbm
.
dump_model
())
self
.
assertIsInstance
(
gbm
.
feature_importance
(),
np
.
ndarray
)
self
.
assertIsInstance
(
gbm
.
feature_importance
(),
np
.
ndarray
)
os
.
remove
(
model_name
)
os
.
remove
(
model_name
)
def
test_continue_train_multiclass
(
self
):
def
test_continue_train_multiclass
(
self
):
X_y
=
load_iris
(
True
)
X
,
y
=
load_iris
(
True
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
params
=
{
params
=
{
'objective'
:
'multiclass'
,
'objective'
:
'multiclass'
,
'metric'
:
'multi_logloss'
,
'metric'
:
'multi_logloss'
,
'num_class'
:
3
'num_class'
:
3
,
'verbose'
:
-
1
}
}
gbm
=
template
.
test_template
(
params
,
X_y
,
num_round
=
20
,
return_model
=
True
,
early_stopping_rounds
=-
1
)
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
,
params
=
params
,
free_raw_data
=
False
)
evals_result
,
ret
=
template
.
test_template
(
params
,
X_y
,
feval
=
multi_logloss
,
lgb_eval
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
,
params
=
params
,
free_raw_data
=
False
)
num_round
=
80
,
init_model
=
gbm
)
init_gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
20
)
evals_result
=
{}
gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
30
,
valid_sets
=
lgb_eval
,
verbose_eval
=
False
,
evals_result
=
evals_result
,
init_model
=
init_gbm
)
ret
=
multi_logloss
(
y_test
,
gbm
.
predict
(
X_test
))
self
.
assertLess
(
ret
,
1.5
)
self
.
assertLess
(
ret
,
1.5
)
self
.
assertAlmostEqual
(
min
(
evals_result
[
'
e
val'
][
'multi_logloss'
]
)
,
ret
,
places
=
5
)
self
.
assertAlmostEqual
(
evals_result
[
'val
id_0
'
][
'multi_logloss'
]
[
-
1
]
,
ret
,
places
=
5
)
def
test_cv
(
self
):
def
test_cv
(
self
):
lgb_train
,
_
=
template
.
test_template
(
return_data
=
True
)
X
,
y
=
load_boston
(
True
)
# shuffle = False
X_train
,
_
,
y_train
,
_
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
lgb
.
cv
({
'verbose'
:
-
1
},
lgb_train
,
num_boost_round
=
10
,
nfold
=
3
,
shuffle
=
False
,
params
=
{
'verbose'
:
-
1
}
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
)
# shuffle = False, override metric in params
params_with_metric
=
{
'metric'
:
'l2'
,
'verbose'
:
-
1
}
lgb
.
cv
(
params_with_metric
,
lgb_train
,
num_boost_round
=
10
,
nfold
=
3
,
shuffle
=
False
,
metrics
=
'l1'
,
verbose_eval
=
False
)
metrics
=
'l1'
,
verbose_eval
=
False
)
# shuffle = True, callbacks
# shuffle = True, callbacks
lgb
.
cv
(
{
'verbose'
:
-
1
}
,
lgb_train
,
num_boost_round
=
10
,
nfold
=
3
,
shuffle
=
True
,
lgb
.
cv
(
params
,
lgb_train
,
num_boost_round
=
10
,
nfold
=
3
,
shuffle
=
True
,
metrics
=
'l1'
,
verbose_eval
=
False
,
metrics
=
'l1'
,
verbose_eval
=
False
,
callbacks
=
[
lgb
.
reset_parameter
(
learning_rate
=
lambda
i
:
0.1
-
0.001
*
i
)])
callbacks
=
[
lgb
.
reset_parameter
(
learning_rate
=
lambda
i
:
0.1
-
0.001
*
i
)])
# self defined data_splitter
# self defined data_splitter
tss
=
TimeSeriesSplit
(
3
)
tss
=
TimeSeriesSplit
(
3
)
lgb
.
cv
(
{
'verbose'
:
-
1
}
,
lgb_train
,
num_boost_round
=
10
,
data_splitter
=
tss
,
nfold
=
5
,
# test if wrong nfold is ignored
lgb
.
cv
(
params
,
lgb_train
,
num_boost_round
=
10
,
data_splitter
=
tss
,
nfold
=
5
,
# test if wrong nfold is ignored
metrics
=
'l2'
,
verbose_eval
=
False
)
metrics
=
'l2'
,
verbose_eval
=
False
)
# lambdarank
# lambdarank
X_train
,
y_train
=
load_svmlight_file
(
'../../examples/lambdarank/rank.train'
)
X_train
,
y_train
=
load_svmlight_file
(
'../../examples/lambdarank/rank.train'
)
q_train
=
np
.
loadtxt
(
'../../examples/lambdarank/rank.train.query'
)
q_train
=
np
.
loadtxt
(
'../../examples/lambdarank/rank.train.query'
)
params
=
{
'objective'
:
'lambdarank'
,
'verbose'
:
-
1
}
params
_lambdarank
=
{
'objective'
:
'lambdarank'
,
'verbose'
:
-
1
}
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
,
group
=
q_train
,
params
=
params
)
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
,
group
=
q_train
)
lgb
.
cv
(
params
,
lgb_train
,
num_boost_round
=
2
0
,
nfold
=
3
,
metrics
=
'l2'
,
verbose_eval
=
False
)
lgb
.
cv
(
params
_lambdarank
,
lgb_train
,
num_boost_round
=
1
0
,
nfold
=
3
,
metrics
=
'l2'
,
verbose_eval
=
False
)
def
test_feature_name
(
self
):
def
test_feature_name
(
self
):
lgb_train
,
_
=
template
.
test_template
(
return_data
=
True
)
X
,
y
=
load_boston
(
True
)
X_train
,
_
,
y_train
,
_
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
params
=
{
'verbose'
:
-
1
}
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
)
feature_names
=
[
'f_'
+
str
(
i
)
for
i
in
range
(
13
)]
feature_names
=
[
'f_'
+
str
(
i
)
for
i
in
range
(
13
)]
gbm
=
lgb
.
train
(
{
'verbose'
:
-
1
}
,
lgb_train
,
num_boost_round
=
5
,
feature_name
=
feature_names
)
gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
5
,
feature_name
=
feature_names
)
self
.
assertListEqual
(
feature_names
,
gbm
.
feature_name
())
self
.
assertListEqual
(
feature_names
,
gbm
.
feature_name
())
# test feature_names with whitespaces
# test feature_names with whitespaces
feature_names_with_space
=
[
'f '
+
str
(
i
)
for
i
in
range
(
13
)]
feature_names_with_space
=
[
'f '
+
str
(
i
)
for
i
in
range
(
13
)]
gbm
=
lgb
.
train
(
{
'verbose'
:
-
1
}
,
lgb_train
,
num_boost_round
=
5
,
feature_name
=
feature_names_with_space
)
gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
5
,
feature_name
=
feature_names_with_space
)
self
.
assertListEqual
(
feature_names
,
gbm
.
feature_name
())
self
.
assertListEqual
(
feature_names
,
gbm
.
feature_name
())
def
test_save_load_copy_pickle
(
self
):
def
test_save_load_copy_pickle
(
self
):
gbm
=
template
.
test_template
(
num_round
=
20
,
return_model
=
True
)
def
test_template
(
init_model
=
None
,
return_model
=
False
):
_
,
ret_origin
=
template
.
test_template
(
init_model
=
gbm
)
X
,
y
=
load_boston
(
True
)
params
=
{
'objective'
:
'regression'
,
'metric'
:
'l2'
,
'verbose'
:
-
1
}
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
)
gbm_template
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
10
,
init_model
=
init_model
)
return
gbm_template
if
return_model
else
mean_squared_error
(
y_test
,
gbm_template
.
predict
(
X_test
))
gbm
=
test_template
(
return_model
=
True
)
ret_origin
=
test_template
(
init_model
=
gbm
)
other_ret
=
[]
other_ret
=
[]
gbm
.
save_model
(
'lgb.model'
)
gbm
.
save_model
(
'lgb.model'
)
other_ret
.
append
(
template
.
test_template
(
init_model
=
'lgb.model'
)
[
1
]
)
other_ret
.
append
(
test_template
(
init_model
=
'lgb.model'
))
gbm_load
=
lgb
.
Booster
(
model_file
=
'lgb.model'
)
gbm_load
=
lgb
.
Booster
(
model_file
=
'lgb.model'
)
other_ret
.
append
(
template
.
test_template
(
init_model
=
gbm_load
)
[
1
]
)
other_ret
.
append
(
test_template
(
init_model
=
gbm_load
))
other_ret
.
append
(
template
.
test_template
(
init_model
=
copy
.
copy
(
gbm
))
[
1
]
)
other_ret
.
append
(
test_template
(
init_model
=
copy
.
copy
(
gbm
)))
other_ret
.
append
(
template
.
test_template
(
init_model
=
copy
.
deepcopy
(
gbm
))
[
1
]
)
other_ret
.
append
(
test_template
(
init_model
=
copy
.
deepcopy
(
gbm
)))
with
open
(
'lgb.pkl'
,
'wb'
)
as
f
:
with
open
(
'lgb.pkl'
,
'wb'
)
as
f
:
pickle
.
dump
(
gbm
,
f
)
pickle
.
dump
(
gbm
,
f
)
with
open
(
'lgb.pkl'
,
'rb'
)
as
f
:
with
open
(
'lgb.pkl'
,
'rb'
)
as
f
:
gbm_pickle
=
pickle
.
load
(
f
)
gbm_pickle
=
pickle
.
load
(
f
)
other_ret
.
append
(
template
.
test_template
(
init_model
=
gbm_pickle
)
[
1
]
)
other_ret
.
append
(
test_template
(
init_model
=
gbm_pickle
))
gbm_pickles
=
pickle
.
loads
(
pickle
.
dumps
(
gbm
))
gbm_pickles
=
pickle
.
loads
(
pickle
.
dumps
(
gbm
))
other_ret
.
append
(
template
.
test_template
(
init_model
=
gbm_pickles
)
[
1
]
)
other_ret
.
append
(
test_template
(
init_model
=
gbm_pickles
))
for
ret
in
other_ret
:
for
ret
in
other_ret
:
self
.
assertAlmostEqual
(
ret_origin
,
ret
,
places
=
5
)
self
.
assertAlmostEqual
(
ret_origin
,
ret
,
places
=
5
)
...
...
tests/python_package_test/test_sklearn.py
View file @
a39141e1
# coding: utf-8
# coding: utf-8
# pylint: skip-file
# pylint: skip-file
import
math
import
unittest
import
unittest
import
lightgbm
as
lgb
import
lightgbm
as
lgb
...
@@ -12,57 +13,52 @@ from sklearn.metrics import log_loss, mean_squared_error
...
@@ -12,57 +13,52 @@ from sklearn.metrics import log_loss, mean_squared_error
from
sklearn.model_selection
import
GridSearchCV
,
train_test_split
from
sklearn.model_selection
import
GridSearchCV
,
train_test_split
class
template
(
object
):
def
multi_error
(
y_true
,
y_pred
):
@
staticmethod
return
np
.
mean
(
y_true
!=
y_pred
)
def
test_template
(
X_y
=
load_boston
(
True
),
model
=
lgb
.
LGBMRegressor
,
feval
=
mean_squared_error
,
num_round
=
100
,
custom_obj
=
None
,
predict_proba
=
False
,
def
multi_logloss
(
y_true
,
y_pred
):
return_data
=
False
,
return_model
=
False
):
return
np
.
mean
([
-
math
.
log
(
y_pred
[
i
][
y
])
for
i
,
y
in
enumerate
(
y_true
)])
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
*
X_y
,
test_size
=
0.1
,
random_state
=
42
)
if
return_data
:
return
X_train
,
X_test
,
y_train
,
y_test
arguments
=
{
'n_estimators'
:
num_round
,
'silent'
:
True
}
if
custom_obj
:
arguments
[
'objective'
]
=
custom_obj
gbm
=
model
(
**
arguments
)
gbm
.
fit
(
X_train
,
y_train
,
eval_set
=
[(
X_test
,
y_test
)],
early_stopping_rounds
=
10
,
verbose
=
False
)
if
return_model
:
return
gbm
elif
predict_proba
:
return
feval
(
y_test
,
gbm
.
predict_proba
(
X_test
))
else
:
return
feval
(
y_test
,
gbm
.
predict
(
X_test
))
class
TestSklearn
(
unittest
.
TestCase
):
class
TestSklearn
(
unittest
.
TestCase
):
def
test_binary
(
self
):
def
test_binary
(
self
):
X_y
=
load_breast_cancer
(
True
)
X
,
y
=
load_breast_cancer
(
True
)
ret
=
template
.
test_template
(
X_y
,
lgb
.
LGBMClassifier
,
log_loss
,
predict_proba
=
True
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
gbm
=
lgb
.
LGBMClassifier
(
n_estimators
=
50
,
silent
=
True
)
gbm
.
fit
(
X_train
,
y_train
,
eval_set
=
[(
X_test
,
y_test
)],
verbose
=
False
)
ret
=
log_loss
(
y_test
,
gbm
.
predict_proba
(
X_test
))
self
.
assertLess
(
ret
,
0.15
)
self
.
assertLess
(
ret
,
0.15
)
self
.
assertAlmostEqual
(
ret
,
gbm
.
evals_result
[
'valid_0'
][
'binary_logloss'
][
-
1
],
places
=
5
)
def
test_regreesion
(
self
):
def
test_regreesion
(
self
):
self
.
assertLess
(
template
.
test_template
()
**
0.5
,
4
)
X
,
y
=
load_boston
(
True
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
gbm
=
lgb
.
LGBMRegressor
(
n_estimators
=
50
,
silent
=
True
)
gbm
.
fit
(
X_train
,
y_train
,
eval_set
=
[(
X_test
,
y_test
)],
verbose
=
False
)
ret
=
mean_squared_error
(
y_test
,
gbm
.
predict
(
X_test
))
self
.
assertLess
(
ret
,
16
)
self
.
assertAlmostEqual
(
ret
,
gbm
.
evals_result
[
'valid_0'
][
'l2'
][
-
1
],
places
=
5
)
def
test_multiclass
(
self
):
def
test_multiclass
(
self
):
X
_
y
=
load_digits
(
10
,
True
)
X
,
y
=
load_digits
(
10
,
True
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
def
multi_error
(
y_true
,
y_pred
):
gbm
=
lgb
.
LGBMClassifier
(
n_estimators
=
50
,
silent
=
True
)
return
np
.
mean
(
y_true
!=
y_pred
)
gbm
.
fit
(
X_train
,
y_train
,
eval_set
=
[(
X_test
,
y_test
)],
verbose
=
False
)
ret
=
template
.
test_template
(
X_y
,
l
gb
.
LGBMClassifier
,
multi_error
)
ret
=
multi_error
(
y_test
,
gb
m
.
predict
(
X_test
)
)
self
.
assertLess
(
ret
,
0.2
)
self
.
assertLess
(
ret
,
0.2
)
ret
=
multi_logloss
(
y_test
,
gbm
.
predict_proba
(
X_test
))
self
.
assertAlmostEqual
(
ret
,
gbm
.
evals_result
[
'valid_0'
][
'multi_logloss'
][
-
1
],
places
=
5
)
def
test_lambdarank
(
self
):
def
test_lambdarank
(
self
):
X_train
,
y_train
=
load_svmlight_file
(
'../../examples/lambdarank/rank.train'
)
X_train
,
y_train
=
load_svmlight_file
(
'../../examples/lambdarank/rank.train'
)
X_test
,
y_test
=
load_svmlight_file
(
'../../examples/lambdarank/rank.test'
)
X_test
,
y_test
=
load_svmlight_file
(
'../../examples/lambdarank/rank.test'
)
q_train
=
np
.
loadtxt
(
'../../examples/lambdarank/rank.train.query'
)
q_train
=
np
.
loadtxt
(
'../../examples/lambdarank/rank.train.query'
)
q_test
=
np
.
loadtxt
(
'../../examples/lambdarank/rank.test.query'
)
q_test
=
np
.
loadtxt
(
'../../examples/lambdarank/rank.test.query'
)
lgb_model
=
lgb
.
LGBMRanker
().
fit
(
X_train
,
y_train
,
gbm
=
lgb
.
LGBMRanker
()
group
=
q_train
,
gbm
.
fit
(
X_train
,
y_train
,
group
=
q_train
,
eval_set
=
[(
X_test
,
y_test
)],
eval_set
=
[(
X_test
,
y_test
)],
eval_group
=
[
q_test
],
eval_at
=
[
1
,
3
],
verbose
=
False
,
eval_group
=
[
q_test
],
eval_at
=
[
1
],
verbose
=
False
,
callbacks
=
[
lgb
.
reset_parameter
(
learning_rate
=
lambda
x
:
0.95
**
x
*
0.1
)])
callbacks
=
[
lgb
.
reset_parameter
(
learning_rate
=
lambda
x
:
0.95
**
x
*
0.1
)])
def
test_regression_with_custom_objective
(
self
):
def
test_regression_with_custom_objective
(
self
):
...
@@ -70,8 +66,13 @@ class TestSklearn(unittest.TestCase):
...
@@ -70,8 +66,13 @@ class TestSklearn(unittest.TestCase):
grad
=
(
y_pred
-
y_true
)
grad
=
(
y_pred
-
y_true
)
hess
=
np
.
ones
(
len
(
y_true
))
hess
=
np
.
ones
(
len
(
y_true
))
return
grad
,
hess
return
grad
,
hess
ret
=
template
.
test_template
(
custom_obj
=
objective_ls
)
X
,
y
=
load_boston
(
True
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
gbm
=
lgb
.
LGBMRegressor
(
n_estimators
=
50
,
silent
=
True
,
objective
=
objective_ls
)
gbm
.
fit
(
X_train
,
y_train
,
eval_set
=
[(
X_test
,
y_test
)],
verbose
=
False
)
ret
=
mean_squared_error
(
y_test
,
gbm
.
predict
(
X_test
))
self
.
assertLess
(
ret
,
100
)
self
.
assertLess
(
ret
,
100
)
self
.
assertAlmostEqual
(
ret
,
gbm
.
evals_result
[
'valid_0'
][
'l2'
][
-
1
],
places
=
5
)
def
test_binary_classification_with_custom_objective
(
self
):
def
test_binary_classification_with_custom_objective
(
self
):
def
logregobj
(
y_true
,
y_pred
):
def
logregobj
(
y_true
,
y_pred
):
...
@@ -79,21 +80,26 @@ class TestSklearn(unittest.TestCase):
...
@@ -79,21 +80,26 @@ class TestSklearn(unittest.TestCase):
grad
=
y_pred
-
y_true
grad
=
y_pred
-
y_true
hess
=
y_pred
*
(
1.0
-
y_pred
)
hess
=
y_pred
*
(
1.0
-
y_pred
)
return
grad
,
hess
return
grad
,
hess
X
_
y
=
load_digits
(
2
,
True
)
X
,
y
=
load_digits
(
2
,
True
)
def
binary_error
(
y_test
,
y_pred
):
def
binary_error
(
y_test
,
y_pred
):
return
np
.
mean
([
int
(
p
>
0.5
)
!=
y
for
y
,
p
in
zip
(
y_test
,
y_pred
)])
return
np
.
mean
([
int
(
p
>
0.5
)
!=
y
for
y
,
p
in
zip
(
y_test
,
y_pred
)])
ret
=
template
.
test_template
(
X_y
,
lgb
.
LGBMClassifier
,
feval
=
binary_error
,
custom_obj
=
logregobj
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
gbm
=
lgb
.
LGBMClassifier
(
n_estimators
=
50
,
silent
=
True
,
objective
=
logregobj
)
gbm
.
fit
(
X_train
,
y_train
,
eval_set
=
[(
X_test
,
y_test
)],
verbose
=
False
)
ret
=
binary_error
(
y_test
,
gbm
.
predict
(
X_test
))
self
.
assertLess
(
ret
,
0.1
)
self
.
assertLess
(
ret
,
0.1
)
def
test_dart
(
self
):
def
test_dart
(
self
):
X_train
,
X_test
,
y_train
,
y_test
=
template
.
test_template
(
return_data
=
True
)
X
,
y
=
load_boston
(
True
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
gbm
=
lgb
.
LGBMRegressor
(
boosting_type
=
'dart'
)
gbm
=
lgb
.
LGBMRegressor
(
boosting_type
=
'dart'
)
gbm
.
fit
(
X_train
,
y_train
)
gbm
.
fit
(
X_train
,
y_train
)
self
.
assertLessEqual
(
gbm
.
score
(
X_train
,
y_train
),
1.
)
self
.
assertLessEqual
(
gbm
.
score
(
X_train
,
y_train
),
1.
)
def
test_grid_search
(
self
):
def
test_grid_search
(
self
):
X_train
,
X_test
,
y_train
,
y_test
=
template
.
test_template
(
return_data
=
True
)
X
,
y
=
load_boston
(
True
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
params
=
{
'boosting_type'
:
[
'dart'
,
'gbdt'
],
params
=
{
'boosting_type'
:
[
'dart'
,
'gbdt'
],
'n_estimators'
:
[
5
,
8
],
'n_estimators'
:
[
5
,
8
],
'drop_rate'
:
[
0.05
,
0.1
]}
'drop_rate'
:
[
0.05
,
0.1
]}
...
@@ -102,24 +108,38 @@ class TestSklearn(unittest.TestCase):
...
@@ -102,24 +108,38 @@ class TestSklearn(unittest.TestCase):
self
.
assertIn
(
gbm
.
best_params_
[
'n_estimators'
],
[
5
,
8
])
self
.
assertIn
(
gbm
.
best_params_
[
'n_estimators'
],
[
5
,
8
])
def
test_clone_and_property
(
self
):
def
test_clone_and_property
(
self
):
gbm
=
template
.
test_template
(
return_model
=
True
)
X
,
y
=
load_boston
(
True
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
gbm
=
lgb
.
LGBMRegressor
(
n_estimators
=
100
,
silent
=
True
)
gbm
.
fit
(
X_train
,
y_train
,
eval_set
=
[(
X_test
,
y_test
)],
early_stopping_rounds
=
10
,
verbose
=
False
)
gbm_clone
=
clone
(
gbm
)
gbm_clone
=
clone
(
gbm
)
self
.
assertIsInstance
(
gbm
.
booster_
,
lgb
.
Booster
)
self
.
assertIsInstance
(
gbm
.
booster_
,
lgb
.
Booster
)
self
.
assertIsInstance
(
gbm
.
feature_importances_
,
np
.
ndarray
)
self
.
assertIsInstance
(
gbm
.
feature_importances_
,
np
.
ndarray
)
clf
=
template
.
test_template
(
load_digits
(
2
,
True
),
model
=
lgb
.
LGBMClassifier
,
return_model
=
True
)
X
,
y
=
load_digits
(
2
,
True
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
clf
=
lgb
.
LGBMClassifier
()
clf
.
fit
(
X_train
,
y_train
,
eval_set
=
[(
X_test
,
y_test
)],
early_stopping_rounds
=
10
,
verbose
=
False
)
self
.
assertListEqual
(
sorted
(
clf
.
classes_
),
[
0
,
1
])
self
.
assertListEqual
(
sorted
(
clf
.
classes_
),
[
0
,
1
])
self
.
assertEqual
(
clf
.
n_classes_
,
2
)
self
.
assertEqual
(
clf
.
n_classes_
,
2
)
self
.
assertIsInstance
(
clf
.
booster_
,
lgb
.
Booster
)
self
.
assertIsInstance
(
clf
.
booster_
,
lgb
.
Booster
)
self
.
assertIsInstance
(
clf
.
feature_importances_
,
np
.
ndarray
)
self
.
assertIsInstance
(
clf
.
feature_importances_
,
np
.
ndarray
)
def
test_joblib
(
self
):
def
test_joblib
(
self
):
gbm
=
template
.
test_template
(
num_round
=
10
,
return_model
=
True
)
X
,
y
=
load_boston
(
True
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
gbm
=
lgb
.
LGBMRegressor
(
n_estimators
=
100
,
silent
=
True
)
gbm
.
fit
(
X_train
,
y_train
,
eval_set
=
[(
X_test
,
y_test
)],
early_stopping_rounds
=
10
,
verbose
=
False
)
joblib
.
dump
(
gbm
,
'lgb.pkl'
)
joblib
.
dump
(
gbm
,
'lgb.pkl'
)
gbm_pickle
=
joblib
.
load
(
'lgb.pkl'
)
gbm_pickle
=
joblib
.
load
(
'lgb.pkl'
)
self
.
assertIsInstance
(
gbm_pickle
.
booster_
,
lgb
.
Booster
)
self
.
assertIsInstance
(
gbm_pickle
.
booster_
,
lgb
.
Booster
)
self
.
assertDictEqual
(
gbm
.
get_params
(),
gbm_pickle
.
get_params
())
self
.
assertDictEqual
(
gbm
.
get_params
(),
gbm_pickle
.
get_params
())
self
.
assertListEqual
(
list
(
gbm
.
feature_importances_
),
list
(
gbm_pickle
.
feature_importances_
))
self
.
assertListEqual
(
list
(
gbm
.
feature_importances_
),
list
(
gbm_pickle
.
feature_importances_
))
X_train
,
X_test
,
y_train
,
y_test
=
template
.
test_template
(
return_data
=
True
)
X
,
y
=
load_boston
(
True
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
42
)
gbm
.
fit
(
X_train
,
y_train
,
eval_set
=
[(
X_test
,
y_test
)],
verbose
=
False
)
gbm
.
fit
(
X_train
,
y_train
,
eval_set
=
[(
X_test
,
y_test
)],
verbose
=
False
)
gbm_pickle
.
fit
(
X_train
,
y_train
,
eval_set
=
[(
X_test
,
y_test
)],
verbose
=
False
)
gbm_pickle
.
fit
(
X_train
,
y_train
,
eval_set
=
[(
X_test
,
y_test
)],
verbose
=
False
)
for
key
in
gbm
.
evals_result_
:
for
key
in
gbm
.
evals_result_
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment