Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
60b0155a
Unverified
Commit
60b0155a
authored
Dec 23, 2024
by
RektPunk
Committed by
GitHub
Dec 22, 2024
Browse files
[python-package] Fix inconsistency in `predict()` output shape for 1-tree models (#6753)
parent
4ee0bc05
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
90 additions
and
2 deletions
+90
-2
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+1
-1
tests/python_package_test/test_engine.py
tests/python_package_test/test_engine.py
+89
-1
No files found.
python-package/lightgbm/basic.py
View file @
60b0155a
...
@@ -1248,7 +1248,7 @@ class _InnerPredictor:
...
@@ -1248,7 +1248,7 @@ class _InnerPredictor:
if
pred_leaf
:
if
pred_leaf
:
preds
=
preds
.
astype
(
np
.
int32
)
preds
=
preds
.
astype
(
np
.
int32
)
is_sparse
=
isinstance
(
preds
,
(
list
,
scipy
.
sparse
.
spmatrix
))
is_sparse
=
isinstance
(
preds
,
(
list
,
scipy
.
sparse
.
spmatrix
))
if
not
is_sparse
and
preds
.
size
!=
nrow
:
if
not
is_sparse
and
(
preds
.
size
!=
nrow
or
pred_leaf
or
pred_contrib
)
:
if
preds
.
size
%
nrow
==
0
:
if
preds
.
size
%
nrow
==
0
:
preds
=
preds
.
reshape
(
nrow
,
-
1
)
preds
=
preds
.
reshape
(
nrow
,
-
1
)
else
:
else
:
...
...
tests/python_package_test/test_engine.py
View file @
60b0155a
...
@@ -15,7 +15,7 @@ import numpy as np
...
@@ -15,7 +15,7 @@ import numpy as np
import
psutil
import
psutil
import
pytest
import
pytest
from
scipy.sparse
import
csr_matrix
,
isspmatrix_csc
,
isspmatrix_csr
from
scipy.sparse
import
csr_matrix
,
isspmatrix_csc
,
isspmatrix_csr
from
sklearn.datasets
import
load_svmlight_file
,
make_blobs
,
make_multilabel_classification
from
sklearn.datasets
import
load_svmlight_file
,
make_blobs
,
make_classification
,
make_multilabel_classification
from
sklearn.metrics
import
average_precision_score
,
log_loss
,
mean_absolute_error
,
mean_squared_error
,
roc_auc_score
from
sklearn.metrics
import
average_precision_score
,
log_loss
,
mean_absolute_error
,
mean_squared_error
,
roc_auc_score
from
sklearn.model_selection
import
GroupKFold
,
TimeSeriesSplit
,
train_test_split
from
sklearn.model_selection
import
GroupKFold
,
TimeSeriesSplit
,
train_test_split
...
@@ -2314,6 +2314,33 @@ def test_refit():
...
@@ -2314,6 +2314,33 @@ def test_refit():
assert
err_pred
>
new_err_pred
assert
err_pred
>
new_err_pred
def
test_refit_with_one_tree_regression
():
X
,
y
=
make_synthetic_regression
(
n_samples
=
1_000
,
n_features
=
2
)
lgb_train
=
lgb
.
Dataset
(
X
,
label
=
y
)
params
=
{
"objective"
:
"regression"
,
"verbosity"
:
-
1
}
model
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
1
)
model_refit
=
model
.
refit
(
X
,
y
)
assert
isinstance
(
model_refit
,
lgb
.
Booster
)
def
test_refit_with_one_tree_binary_classification
():
X
,
y
=
load_breast_cancer
(
return_X_y
=
True
)
lgb_train
=
lgb
.
Dataset
(
X
,
label
=
y
)
params
=
{
"objective"
:
"binary"
,
"verbosity"
:
-
1
}
model
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
1
)
model_refit
=
model
.
refit
(
X
,
y
)
assert
isinstance
(
model_refit
,
lgb
.
Booster
)
def
test_refit_with_one_tree_multiclass_classification
():
X
,
y
=
load_iris
(
return_X_y
=
True
)
lgb_train
=
lgb
.
Dataset
(
X
,
y
)
params
=
{
"objective"
:
"multiclass"
,
"num_class"
:
3
,
"verbose"
:
-
1
}
model
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
1
)
model_refit
=
model
.
refit
(
X
,
y
)
assert
isinstance
(
model_refit
,
lgb
.
Booster
)
def
test_refit_dataset_params
(
rng
):
def
test_refit_dataset_params
(
rng
):
# check refit accepts dataset_params
# check refit accepts dataset_params
X
,
y
=
load_breast_cancer
(
return_X_y
=
True
)
X
,
y
=
load_breast_cancer
(
return_X_y
=
True
)
...
@@ -3872,6 +3899,67 @@ def test_predict_stump(rng, use_init_score):
...
@@ -3872,6 +3899,67 @@ def test_predict_stump(rng, use_init_score):
np
.
testing
.
assert_allclose
(
preds_all
,
np
.
full_like
(
preds_all
,
fill_value
=
y_avg
))
np
.
testing
.
assert_allclose
(
preds_all
,
np
.
full_like
(
preds_all
,
fill_value
=
y_avg
))
def
test_predict_regression_output_shape
():
n_samples
=
1_000
n_features
=
4
X
,
y
=
make_synthetic_regression
(
n_samples
=
n_samples
,
n_features
=
n_features
)
dtrain
=
lgb
.
Dataset
(
X
,
label
=
y
)
params
=
{
"objective"
:
"regression"
,
"verbosity"
:
-
1
}
# 1-round model
bst
=
lgb
.
train
(
params
,
dtrain
,
num_boost_round
=
1
)
assert
bst
.
predict
(
X
).
shape
==
(
n_samples
,)
assert
bst
.
predict
(
X
,
pred_contrib
=
True
).
shape
==
(
n_samples
,
n_features
+
1
)
assert
bst
.
predict
(
X
,
pred_leaf
=
True
).
shape
==
(
n_samples
,
1
)
# 2-round model
bst
=
lgb
.
train
(
params
,
dtrain
,
num_boost_round
=
2
)
assert
bst
.
predict
(
X
).
shape
==
(
n_samples
,)
assert
bst
.
predict
(
X
,
pred_contrib
=
True
).
shape
==
(
n_samples
,
n_features
+
1
)
assert
bst
.
predict
(
X
,
pred_leaf
=
True
).
shape
==
(
n_samples
,
2
)
def
test_predict_binary_classification_output_shape
():
n_samples
=
1_000
n_features
=
4
X
,
y
=
make_classification
(
n_samples
=
n_samples
,
n_features
=
n_features
,
n_classes
=
2
)
dtrain
=
lgb
.
Dataset
(
X
,
label
=
y
)
params
=
{
"objective"
:
"binary"
,
"verbosity"
:
-
1
}
# 1-round model
bst
=
lgb
.
train
(
params
,
dtrain
,
num_boost_round
=
1
)
assert
bst
.
predict
(
X
).
shape
==
(
n_samples
,)
assert
bst
.
predict
(
X
,
pred_contrib
=
True
).
shape
==
(
n_samples
,
n_features
+
1
)
assert
bst
.
predict
(
X
,
pred_leaf
=
True
).
shape
==
(
n_samples
,
1
)
# 2-round model
bst
=
lgb
.
train
(
params
,
dtrain
,
num_boost_round
=
2
)
assert
bst
.
predict
(
X
).
shape
==
(
n_samples
,)
assert
bst
.
predict
(
X
,
pred_contrib
=
True
).
shape
==
(
n_samples
,
n_features
+
1
)
assert
bst
.
predict
(
X
,
pred_leaf
=
True
).
shape
==
(
n_samples
,
2
)
def
test_predict_multiclass_classification_output_shape
():
n_samples
=
1_000
n_features
=
10
n_classes
=
3
X
,
y
=
make_classification
(
n_samples
=
n_samples
,
n_features
=
n_features
,
n_classes
=
n_classes
,
n_informative
=
6
)
dtrain
=
lgb
.
Dataset
(
X
,
label
=
y
)
params
=
{
"objective"
:
"multiclass"
,
"verbosity"
:
-
1
,
"num_class"
:
n_classes
}
# 1-round model
bst
=
lgb
.
train
(
params
,
dtrain
,
num_boost_round
=
1
)
assert
bst
.
predict
(
X
).
shape
==
(
n_samples
,
n_classes
)
assert
bst
.
predict
(
X
,
pred_contrib
=
True
).
shape
==
(
n_samples
,
n_classes
*
(
n_features
+
1
))
assert
bst
.
predict
(
X
,
pred_leaf
=
True
).
shape
==
(
n_samples
,
n_classes
)
# 2-round model
bst
=
lgb
.
train
(
params
,
dtrain
,
num_boost_round
=
2
)
assert
bst
.
predict
(
X
).
shape
==
(
n_samples
,
n_classes
)
assert
bst
.
predict
(
X
,
pred_contrib
=
True
).
shape
==
(
n_samples
,
n_classes
*
(
n_features
+
1
))
assert
bst
.
predict
(
X
,
pred_leaf
=
True
).
shape
==
(
n_samples
,
n_classes
*
2
)
def
test_average_precision_metric
():
def
test_average_precision_metric
():
# test against sklearn average precision metric
# test against sklearn average precision metric
X
,
y
=
load_breast_cancer
(
return_X_y
=
True
)
X
,
y
=
load_breast_cancer
(
return_X_y
=
True
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment