Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
45ac271b
Unverified
Commit
45ac271b
authored
Jun 27, 2021
by
Nikita Titov
Committed by
GitHub
Jun 27, 2021
Browse files
[python] replace numpy.zeros with numpy.empty for the speedup (#4410)
parent
db3915c2
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
20 additions
and
22 deletions
+20
-22
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+13
-13
python-package/lightgbm/engine.py
python-package/lightgbm/engine.py
+3
-3
tests/c_api_test/test_.py
tests/c_api_test/test_.py
+1
-1
tests/python_package_test/test_engine.py
tests/python_package_test/test_engine.py
+3
-5
No files found.
python-package/lightgbm/basic.py
View file @
45ac271b
...
...
@@ -780,7 +780,7 @@ class _InnerPredictor:
ptr_data
,
type_ptr_data
,
_
=
c_float_array
(
data
)
n_preds
=
self
.
__get_num_preds
(
start_iteration
,
num_iteration
,
mat
.
shape
[
0
],
predict_type
)
if
preds
is
None
:
preds
=
np
.
zeros
(
n_preds
,
dtype
=
np
.
float64
)
preds
=
np
.
empty
(
n_preds
,
dtype
=
np
.
float64
)
elif
len
(
preds
.
shape
)
!=
1
or
len
(
preds
)
!=
n_preds
:
raise
ValueError
(
"Wrong length of pre-allocated predict array"
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
...
...
@@ -807,7 +807,7 @@ class _InnerPredictor:
# __get_num_preds() cannot work with nrow > MAX_INT32, so calculate overall number of predictions piecemeal
n_preds
=
[
self
.
__get_num_preds
(
start_iteration
,
num_iteration
,
i
,
predict_type
)
for
i
in
np
.
diff
([
0
]
+
list
(
sections
)
+
[
nrow
])]
n_preds_sections
=
np
.
array
([
0
]
+
n_preds
,
dtype
=
np
.
intp
).
cumsum
()
preds
=
np
.
zeros
(
sum
(
n_preds
),
dtype
=
np
.
float64
)
preds
=
np
.
empty
(
sum
(
n_preds
),
dtype
=
np
.
float64
)
for
chunk
,
(
start_idx_pred
,
end_idx_pred
)
in
zip
(
np
.
array_split
(
mat
,
sections
),
zip
(
n_preds_sections
,
n_preds_sections
[
1
:])):
# avoid memory consumption by arrays concatenation operations
...
...
@@ -868,7 +868,7 @@ class _InnerPredictor:
nrow
=
len
(
csr
.
indptr
)
-
1
n_preds
=
self
.
__get_num_preds
(
start_iteration
,
num_iteration
,
nrow
,
predict_type
)
if
preds
is
None
:
preds
=
np
.
zeros
(
n_preds
,
dtype
=
np
.
float64
)
preds
=
np
.
empty
(
n_preds
,
dtype
=
np
.
float64
)
elif
len
(
preds
.
shape
)
!=
1
or
len
(
preds
)
!=
n_preds
:
raise
ValueError
(
"Wrong length of pre-allocated predict array"
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
...
...
@@ -913,7 +913,7 @@ class _InnerPredictor:
out_ptr_data
=
ctypes
.
POINTER
(
ctypes
.
c_float
)()
else
:
out_ptr_data
=
ctypes
.
POINTER
(
ctypes
.
c_double
)()
out_shape
=
np
.
zeros
(
2
,
dtype
=
np
.
int64
)
out_shape
=
np
.
empty
(
2
,
dtype
=
np
.
int64
)
_safe_call
(
_LIB
.
LGBM_BoosterPredictSparseOutput
(
self
.
handle
,
ptr_indptr
,
...
...
@@ -946,7 +946,7 @@ class _InnerPredictor:
# __get_num_preds() cannot work with nrow > MAX_INT32, so calculate overall number of predictions piecemeal
n_preds
=
[
self
.
__get_num_preds
(
start_iteration
,
num_iteration
,
i
,
predict_type
)
for
i
in
np
.
diff
(
sections
)]
n_preds_sections
=
np
.
array
([
0
]
+
n_preds
,
dtype
=
np
.
intp
).
cumsum
()
preds
=
np
.
zeros
(
sum
(
n_preds
),
dtype
=
np
.
float64
)
preds
=
np
.
empty
(
sum
(
n_preds
),
dtype
=
np
.
float64
)
for
(
start_idx
,
end_idx
),
(
start_idx_pred
,
end_idx_pred
)
in
zip
(
zip
(
sections
,
sections
[
1
:]),
zip
(
n_preds_sections
,
n_preds_sections
[
1
:])):
# avoid memory consumption by arrays concatenation operations
...
...
@@ -971,7 +971,7 @@ class _InnerPredictor:
out_ptr_data
=
ctypes
.
POINTER
(
ctypes
.
c_float
)()
else
:
out_ptr_data
=
ctypes
.
POINTER
(
ctypes
.
c_double
)()
out_shape
=
np
.
zeros
(
2
,
dtype
=
np
.
int64
)
out_shape
=
np
.
empty
(
2
,
dtype
=
np
.
int64
)
_safe_call
(
_LIB
.
LGBM_BoosterPredictSparseOutput
(
self
.
handle
,
ptr_indptr
,
...
...
@@ -1002,7 +1002,7 @@ class _InnerPredictor:
if
predict_type
==
C_API_PREDICT_CONTRIB
:
return
inner_predict_sparse
(
csc
,
start_iteration
,
num_iteration
,
predict_type
)
n_preds
=
self
.
__get_num_preds
(
start_iteration
,
num_iteration
,
nrow
,
predict_type
)
preds
=
np
.
zeros
(
n_preds
,
dtype
=
np
.
float64
)
preds
=
np
.
empty
(
n_preds
,
dtype
=
np
.
float64
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
ptr_indptr
,
type_ptr_indptr
,
__
=
c_int_array
(
csc
.
indptr
)
...
...
@@ -1176,7 +1176,7 @@ class Dataset:
if
used_indices
is
not
None
:
assert
not
self
.
need_slice
if
isinstance
(
data
,
str
):
sub_init_score
=
np
.
zeros
(
num_data
*
predictor
.
num_class
,
dtype
=
np
.
float32
)
sub_init_score
=
np
.
empty
(
num_data
*
predictor
.
num_class
,
dtype
=
np
.
float32
)
assert
num_data
==
len
(
used_indices
)
for
i
in
range
(
len
(
used_indices
)):
for
j
in
range
(
predictor
.
num_class
):
...
...
@@ -1184,7 +1184,7 @@ class Dataset:
init_score
=
sub_init_score
if
predictor
.
num_class
>
1
:
# need to regroup init_score
new_init_score
=
np
.
zeros
(
init_score
.
size
,
dtype
=
np
.
float32
)
new_init_score
=
np
.
empty
(
init_score
.
size
,
dtype
=
np
.
float32
)
for
i
in
range
(
num_data
):
for
j
in
range
(
predictor
.
num_class
):
new_init_score
[
j
*
num_data
+
i
]
=
init_score
[
i
*
predictor
.
num_class
+
j
]
...
...
@@ -1320,7 +1320,7 @@ class Dataset:
def
__init_from_list_np2d
(
self
,
mats
,
params_str
,
ref_dataset
):
"""Initialize data from a list of 2-D numpy matrices."""
ncol
=
mats
[
0
].
shape
[
1
]
nrow
=
np
.
zeros
((
len
(
mats
),),
np
.
int32
)
nrow
=
np
.
empty
((
len
(
mats
),),
np
.
int32
)
if
mats
[
0
].
dtype
==
np
.
float64
:
ptr_data
=
(
ctypes
.
POINTER
(
ctypes
.
c_double
)
*
len
(
mats
))()
else
:
...
...
@@ -3310,7 +3310,7 @@ class Booster:
if
iteration
is
None
:
iteration
=
self
.
best_iteration
importance_type_int
=
FEATURE_IMPORTANCE_TYPE_MAPPER
[
importance_type
]
result
=
np
.
zeros
(
self
.
num_feature
(),
dtype
=
np
.
float64
)
result
=
np
.
empty
(
self
.
num_feature
(),
dtype
=
np
.
float64
)
_safe_call
(
_LIB
.
LGBM_BoosterFeatureImportance
(
self
.
handle
,
ctypes
.
c_int
(
iteration
),
...
...
@@ -3397,7 +3397,7 @@ class Booster:
self
.
__get_eval_info
()
ret
=
[]
if
self
.
__num_inner_eval
>
0
:
result
=
np
.
zeros
(
self
.
__num_inner_eval
,
dtype
=
np
.
float64
)
result
=
np
.
empty
(
self
.
__num_inner_eval
,
dtype
=
np
.
float64
)
tmp_out_len
=
ctypes
.
c_int
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterGetEval
(
self
.
handle
,
...
...
@@ -3437,7 +3437,7 @@ class Booster:
n_preds
=
self
.
train_set
.
num_data
()
*
self
.
__num_class
else
:
n_preds
=
self
.
valid_sets
[
data_idx
-
1
].
num_data
()
*
self
.
__num_class
self
.
__inner_predict_buffer
[
data_idx
]
=
np
.
zeros
(
n_preds
,
dtype
=
np
.
float64
)
self
.
__inner_predict_buffer
[
data_idx
]
=
np
.
empty
(
n_preds
,
dtype
=
np
.
float64
)
# avoid to predict many time in one iteration
if
not
self
.
__is_predicted_cur_iter
[
data_idx
]:
tmp_out_len
=
ctypes
.
c_int64
(
0
)
...
...
python-package/lightgbm/engine.py
View file @
45ac271b
...
...
@@ -333,7 +333,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
flatted_group
=
np
.
repeat
(
range
(
len
(
group_info
)),
repeats
=
group_info
)
else
:
flatted_group
=
np
.
zeros
(
num_data
,
dtype
=
np
.
int32
)
folds
=
folds
.
split
(
X
=
np
.
zeros
(
num_data
),
y
=
full_data
.
get_label
(),
groups
=
flatted_group
)
folds
=
folds
.
split
(
X
=
np
.
empty
(
num_data
),
y
=
full_data
.
get_label
(),
groups
=
flatted_group
)
else
:
if
any
(
params
.
get
(
obj_alias
,
""
)
in
{
"lambdarank"
,
"rank_xendcg"
,
"xendcg"
,
"xe_ndcg"
,
"xe_ndcg_mart"
,
"xendcg_mart"
}
...
...
@@ -344,12 +344,12 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
group_info
=
np
.
array
(
full_data
.
get_group
(),
dtype
=
np
.
int32
,
copy
=
False
)
flatted_group
=
np
.
repeat
(
range
(
len
(
group_info
)),
repeats
=
group_info
)
group_kfold
=
_LGBMGroupKFold
(
n_splits
=
nfold
)
folds
=
group_kfold
.
split
(
X
=
np
.
zeros
(
num_data
),
groups
=
flatted_group
)
folds
=
group_kfold
.
split
(
X
=
np
.
empty
(
num_data
),
groups
=
flatted_group
)
elif
stratified
:
if
not
SKLEARN_INSTALLED
:
raise
LightGBMError
(
'scikit-learn is required for stratified cv'
)
skf
=
_LGBMStratifiedKFold
(
n_splits
=
nfold
,
shuffle
=
shuffle
,
random_state
=
seed
)
folds
=
skf
.
split
(
X
=
np
.
zeros
(
num_data
),
y
=
full_data
.
get_label
())
folds
=
skf
.
split
(
X
=
np
.
empty
(
num_data
),
y
=
full_data
.
get_label
())
else
:
if
shuffle
:
randidx
=
np
.
random
.
RandomState
(
seed
).
permutation
(
num_data
)
...
...
tests/c_api_test/test_.py
View file @
45ac271b
...
...
@@ -268,7 +268,7 @@ def test_booster():
for
line
in
inp
.
readlines
():
data
.
append
([
float
(
x
)
for
x
in
line
.
split
(
'
\t
'
)[
1
:]])
mat
=
np
.
array
(
data
,
dtype
=
np
.
float64
)
preb
=
np
.
zeros
(
mat
.
shape
[
0
],
dtype
=
np
.
float64
)
preb
=
np
.
empty
(
mat
.
shape
[
0
],
dtype
=
np
.
float64
)
num_preb
=
ctypes
.
c_int64
(
0
)
data
=
np
.
array
(
mat
.
reshape
(
mat
.
size
),
dtype
=
np
.
float64
,
copy
=
False
)
LIB
.
LGBM_BoosterPredictForMat
(
...
...
tests/python_package_test/test_engine.py
View file @
45ac271b
...
...
@@ -1441,9 +1441,8 @@ def test_max_bin_by_feature():
def
test_small_max_bin
():
np
.
random
.
seed
(
0
)
y
=
np
.
random
.
choice
([
0
,
1
],
100
)
x
=
np
.
zero
s
((
100
,
1
))
x
=
np
.
one
s
((
100
,
1
))
x
[:
30
,
0
]
=
-
1
x
[
30
:
60
,
0
]
=
1
x
[
60
:,
0
]
=
2
params
=
{
'objective'
:
'binary'
,
'seed'
:
0
,
...
...
@@ -2259,7 +2258,7 @@ def test_node_level_subcol():
def
test_forced_bins
():
x
=
np
.
zeros
((
100
,
2
))
x
=
np
.
empty
((
100
,
2
))
x
[:,
0
]
=
np
.
arange
(
0
,
1
,
0.01
)
x
[:,
1
]
=
-
np
.
arange
(
0
,
1
,
0.01
)
y
=
np
.
arange
(
0
,
1
,
0.01
)
...
...
@@ -2275,7 +2274,6 @@ def test_forced_bins():
est
=
lgb
.
train
(
params
,
lgb_x
,
num_boost_round
=
20
)
new_x
=
np
.
zeros
((
3
,
x
.
shape
[
1
]))
new_x
[:,
0
]
=
[
0.31
,
0.37
,
0.41
]
new_x
[:,
1
]
=
[
0
,
0
,
0
]
predicted
=
est
.
predict
(
new_x
)
assert
len
(
np
.
unique
(
predicted
))
==
3
new_x
[:,
0
]
=
[
0
,
0
,
0
]
...
...
@@ -2300,7 +2298,7 @@ def test_forced_bins():
def
test_binning_same_sign
():
# test that binning works properly for features with only positive or only negative values
x
=
np
.
zeros
((
99
,
2
))
x
=
np
.
empty
((
99
,
2
))
x
[:,
0
]
=
np
.
arange
(
0.01
,
1
,
0.01
)
x
[:,
1
]
=
-
np
.
arange
(
0.01
,
1
,
0.01
)
y
=
np
.
arange
(
0.01
,
1
,
0.01
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment