Unverified Commit 45ac271b authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[python] replace numpy.zeros with numpy.empty for the speedup (#4410)

parent db3915c2
......@@ -780,7 +780,7 @@ class _InnerPredictor:
ptr_data, type_ptr_data, _ = c_float_array(data)
n_preds = self.__get_num_preds(start_iteration, num_iteration, mat.shape[0], predict_type)
if preds is None:
preds = np.zeros(n_preds, dtype=np.float64)
preds = np.empty(n_preds, dtype=np.float64)
elif len(preds.shape) != 1 or len(preds) != n_preds:
raise ValueError("Wrong length of pre-allocated predict array")
out_num_preds = ctypes.c_int64(0)
......@@ -807,7 +807,7 @@ class _InnerPredictor:
# __get_num_preds() cannot work with nrow > MAX_INT32, so calculate overall number of predictions piecemeal
n_preds = [self.__get_num_preds(start_iteration, num_iteration, i, predict_type) for i in np.diff([0] + list(sections) + [nrow])]
n_preds_sections = np.array([0] + n_preds, dtype=np.intp).cumsum()
preds = np.zeros(sum(n_preds), dtype=np.float64)
preds = np.empty(sum(n_preds), dtype=np.float64)
for chunk, (start_idx_pred, end_idx_pred) in zip(np.array_split(mat, sections),
zip(n_preds_sections, n_preds_sections[1:])):
# avoid memory consumption by arrays concatenation operations
......@@ -868,7 +868,7 @@ class _InnerPredictor:
nrow = len(csr.indptr) - 1
n_preds = self.__get_num_preds(start_iteration, num_iteration, nrow, predict_type)
if preds is None:
preds = np.zeros(n_preds, dtype=np.float64)
preds = np.empty(n_preds, dtype=np.float64)
elif len(preds.shape) != 1 or len(preds) != n_preds:
raise ValueError("Wrong length of pre-allocated predict array")
out_num_preds = ctypes.c_int64(0)
......@@ -913,7 +913,7 @@ class _InnerPredictor:
out_ptr_data = ctypes.POINTER(ctypes.c_float)()
else:
out_ptr_data = ctypes.POINTER(ctypes.c_double)()
out_shape = np.zeros(2, dtype=np.int64)
out_shape = np.empty(2, dtype=np.int64)
_safe_call(_LIB.LGBM_BoosterPredictSparseOutput(
self.handle,
ptr_indptr,
......@@ -946,7 +946,7 @@ class _InnerPredictor:
# __get_num_preds() cannot work with nrow > MAX_INT32, so calculate overall number of predictions piecemeal
n_preds = [self.__get_num_preds(start_iteration, num_iteration, i, predict_type) for i in np.diff(sections)]
n_preds_sections = np.array([0] + n_preds, dtype=np.intp).cumsum()
preds = np.zeros(sum(n_preds), dtype=np.float64)
preds = np.empty(sum(n_preds), dtype=np.float64)
for (start_idx, end_idx), (start_idx_pred, end_idx_pred) in zip(zip(sections, sections[1:]),
zip(n_preds_sections, n_preds_sections[1:])):
# avoid memory consumption by arrays concatenation operations
......@@ -971,7 +971,7 @@ class _InnerPredictor:
out_ptr_data = ctypes.POINTER(ctypes.c_float)()
else:
out_ptr_data = ctypes.POINTER(ctypes.c_double)()
out_shape = np.zeros(2, dtype=np.int64)
out_shape = np.empty(2, dtype=np.int64)
_safe_call(_LIB.LGBM_BoosterPredictSparseOutput(
self.handle,
ptr_indptr,
......@@ -1002,7 +1002,7 @@ class _InnerPredictor:
if predict_type == C_API_PREDICT_CONTRIB:
return inner_predict_sparse(csc, start_iteration, num_iteration, predict_type)
n_preds = self.__get_num_preds(start_iteration, num_iteration, nrow, predict_type)
preds = np.zeros(n_preds, dtype=np.float64)
preds = np.empty(n_preds, dtype=np.float64)
out_num_preds = ctypes.c_int64(0)
ptr_indptr, type_ptr_indptr, __ = c_int_array(csc.indptr)
......@@ -1176,7 +1176,7 @@ class Dataset:
if used_indices is not None:
assert not self.need_slice
if isinstance(data, str):
sub_init_score = np.zeros(num_data * predictor.num_class, dtype=np.float32)
sub_init_score = np.empty(num_data * predictor.num_class, dtype=np.float32)
assert num_data == len(used_indices)
for i in range(len(used_indices)):
for j in range(predictor.num_class):
......@@ -1184,7 +1184,7 @@ class Dataset:
init_score = sub_init_score
if predictor.num_class > 1:
# need to regroup init_score
new_init_score = np.zeros(init_score.size, dtype=np.float32)
new_init_score = np.empty(init_score.size, dtype=np.float32)
for i in range(num_data):
for j in range(predictor.num_class):
new_init_score[j * num_data + i] = init_score[i * predictor.num_class + j]
......@@ -1320,7 +1320,7 @@ class Dataset:
def __init_from_list_np2d(self, mats, params_str, ref_dataset):
"""Initialize data from a list of 2-D numpy matrices."""
ncol = mats[0].shape[1]
nrow = np.zeros((len(mats),), np.int32)
nrow = np.empty((len(mats),), np.int32)
if mats[0].dtype == np.float64:
ptr_data = (ctypes.POINTER(ctypes.c_double) * len(mats))()
else:
......@@ -3310,7 +3310,7 @@ class Booster:
if iteration is None:
iteration = self.best_iteration
importance_type_int = FEATURE_IMPORTANCE_TYPE_MAPPER[importance_type]
result = np.zeros(self.num_feature(), dtype=np.float64)
result = np.empty(self.num_feature(), dtype=np.float64)
_safe_call(_LIB.LGBM_BoosterFeatureImportance(
self.handle,
ctypes.c_int(iteration),
......@@ -3397,7 +3397,7 @@ class Booster:
self.__get_eval_info()
ret = []
if self.__num_inner_eval > 0:
result = np.zeros(self.__num_inner_eval, dtype=np.float64)
result = np.empty(self.__num_inner_eval, dtype=np.float64)
tmp_out_len = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterGetEval(
self.handle,
......@@ -3437,7 +3437,7 @@ class Booster:
n_preds = self.train_set.num_data() * self.__num_class
else:
n_preds = self.valid_sets[data_idx - 1].num_data() * self.__num_class
self.__inner_predict_buffer[data_idx] = np.zeros(n_preds, dtype=np.float64)
self.__inner_predict_buffer[data_idx] = np.empty(n_preds, dtype=np.float64)
# avoid to predict many time in one iteration
if not self.__is_predicted_cur_iter[data_idx]:
tmp_out_len = ctypes.c_int64(0)
......
......@@ -333,7 +333,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
else:
flatted_group = np.zeros(num_data, dtype=np.int32)
folds = folds.split(X=np.zeros(num_data), y=full_data.get_label(), groups=flatted_group)
folds = folds.split(X=np.empty(num_data), y=full_data.get_label(), groups=flatted_group)
else:
if any(params.get(obj_alias, "") in {"lambdarank", "rank_xendcg", "xendcg",
"xe_ndcg", "xe_ndcg_mart", "xendcg_mart"}
......@@ -344,12 +344,12 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
group_info = np.array(full_data.get_group(), dtype=np.int32, copy=False)
flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
group_kfold = _LGBMGroupKFold(n_splits=nfold)
folds = group_kfold.split(X=np.zeros(num_data), groups=flatted_group)
folds = group_kfold.split(X=np.empty(num_data), groups=flatted_group)
elif stratified:
if not SKLEARN_INSTALLED:
raise LightGBMError('scikit-learn is required for stratified cv')
skf = _LGBMStratifiedKFold(n_splits=nfold, shuffle=shuffle, random_state=seed)
folds = skf.split(X=np.zeros(num_data), y=full_data.get_label())
folds = skf.split(X=np.empty(num_data), y=full_data.get_label())
else:
if shuffle:
randidx = np.random.RandomState(seed).permutation(num_data)
......
......@@ -268,7 +268,7 @@ def test_booster():
for line in inp.readlines():
data.append([float(x) for x in line.split('\t')[1:]])
mat = np.array(data, dtype=np.float64)
preb = np.zeros(mat.shape[0], dtype=np.float64)
preb = np.empty(mat.shape[0], dtype=np.float64)
num_preb = ctypes.c_int64(0)
data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
LIB.LGBM_BoosterPredictForMat(
......
......@@ -1441,9 +1441,8 @@ def test_max_bin_by_feature():
def test_small_max_bin():
np.random.seed(0)
y = np.random.choice([0, 1], 100)
x = np.zeros((100, 1))
x = np.ones((100, 1))
x[:30, 0] = -1
x[30:60, 0] = 1
x[60:, 0] = 2
params = {'objective': 'binary',
'seed': 0,
......@@ -2259,7 +2258,7 @@ def test_node_level_subcol():
def test_forced_bins():
x = np.zeros((100, 2))
x = np.empty((100, 2))
x[:, 0] = np.arange(0, 1, 0.01)
x[:, 1] = -np.arange(0, 1, 0.01)
y = np.arange(0, 1, 0.01)
......@@ -2275,7 +2274,6 @@ def test_forced_bins():
est = lgb.train(params, lgb_x, num_boost_round=20)
new_x = np.zeros((3, x.shape[1]))
new_x[:, 0] = [0.31, 0.37, 0.41]
new_x[:, 1] = [0, 0, 0]
predicted = est.predict(new_x)
assert len(np.unique(predicted)) == 3
new_x[:, 0] = [0, 0, 0]
......@@ -2300,7 +2298,7 @@ def test_forced_bins():
def test_binning_same_sign():
# test that binning works properly for features with only positive or only negative values
x = np.zeros((99, 2))
x = np.empty((99, 2))
x[:, 0] = np.arange(0.01, 1, 0.01)
x[:, 1] = -np.arange(0.01, 1, 0.01)
y = np.arange(0.01, 1, 0.01)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment