Unverified Commit 465d1262 authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

check sorted indices in Subset (#2510)

* Update sparse_bin.hpp

* check sorted in c_api

* fix python package

* fix tests

* fix test

* std::is_sorted

* Update basic.py
parent 198d0f3b
...@@ -1095,7 +1095,7 @@ class Dataset(object): ...@@ -1095,7 +1095,7 @@ class Dataset(object):
free_raw_data=self.free_raw_data) free_raw_data=self.free_raw_data)
ret._predictor = self._predictor ret._predictor = self._predictor
ret.pandas_categorical = self.pandas_categorical ret.pandas_categorical = self.pandas_categorical
ret.used_indices = used_indices ret.used_indices = sorted(used_indices)
return ret return ret
def save_binary(self, filename): def save_binary(self, filename):
......
...@@ -339,8 +339,8 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi ...@@ -339,8 +339,8 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
ret = _CVBooster() ret = _CVBooster()
for train_idx, test_idx in folds: for train_idx, test_idx in folds:
train_set = full_data.subset(train_idx) train_set = full_data.subset(sorted(train_idx))
valid_set = full_data.subset(test_idx) valid_set = full_data.subset(sorted(test_idx))
# run preprocessing on the data set if needed # run preprocessing on the data set if needed
if fpreproc is not None: if fpreproc is not None:
train_set, valid_set, tparam = fpreproc(train_set, valid_set, params.copy()) train_set, valid_set, tparam = fpreproc(train_set, valid_set, params.copy())
......
...@@ -921,6 +921,9 @@ int LGBM_DatasetGetSubset( ...@@ -921,6 +921,9 @@ int LGBM_DatasetGetSubset(
const int32_t lower = 0; const int32_t lower = 0;
const int32_t upper = full_dataset->num_data() - 1; const int32_t upper = full_dataset->num_data() - 1;
Common::CheckElementsIntervalClosed(used_row_indices, lower, upper, num_used_row_indices, "Used indices of subset"); Common::CheckElementsIntervalClosed(used_row_indices, lower, upper, num_used_row_indices, "Used indices of subset");
if (!std::is_sorted(used_row_indices, used_row_indices + num_used_row_indices)) {
Log::Fatal("used_row_indices should be sorted in Subset");
}
auto ret = std::unique_ptr<Dataset>(new Dataset(num_used_row_indices)); auto ret = std::unique_ptr<Dataset>(new Dataset(num_used_row_indices));
ret->CopyFeatureMapperFrom(full_dataset); ret->CopyFeatureMapperFrom(full_dataset);
ret->CopySubset(full_dataset, used_row_indices, num_used_row_indices, true); ret->CopySubset(full_dataset, used_row_indices, num_used_row_indices, true);
......
...@@ -820,9 +820,9 @@ class TestEngine(unittest.TestCase): ...@@ -820,9 +820,9 @@ class TestEngine(unittest.TestCase):
data = np.random.random((500, 2)) data = np.random.random((500, 2))
y = [1] * 250 + [0] * 250 y = [1] * 250 + [0] * 250
lgb_train = lgb.Dataset(data, y, free_raw_data=False) lgb_train = lgb.Dataset(data, y, free_raw_data=False)
subset_index_1 = np.random.choice(np.arange(500), 300, replace=False) subset_index_1 = sorted(np.random.choice(np.arange(500), 300, replace=False))
subset_data_1 = lgb_train.subset(subset_index_1) subset_data_1 = lgb_train.subset(subset_index_1)
subset_index_2 = np.random.choice(np.arange(500), 200, replace=False) subset_index_2 = sorted(np.random.choice(np.arange(500), 200, replace=False))
subset_data_2 = lgb_train.subset(subset_index_2) subset_data_2 = lgb_train.subset(subset_index_2)
params = { params = {
'objective': 'binary', 'objective': 'binary',
...@@ -1601,8 +1601,8 @@ class TestEngine(unittest.TestCase): ...@@ -1601,8 +1601,8 @@ class TestEngine(unittest.TestCase):
iter_min = min([iter_min_l1, iter_min_l2]) iter_min = min([iter_min_l1, iter_min_l2])
iter_min_valid1 = min([iter_valid1_l1, iter_valid1_l2]) iter_min_valid1 = min([iter_valid1_l1, iter_valid1_l2])
iter_cv_l1 = 3 iter_cv_l1 = 4
iter_cv_l2 = 17 iter_cv_l2 = 12
self.assertEqual(len(set([iter_cv_l1, iter_cv_l2])), 2) self.assertEqual(len(set([iter_cv_l1, iter_cv_l2])), 2)
iter_cv_min = min([iter_cv_l1, iter_cv_l2]) iter_cv_min = min([iter_cv_l1, iter_cv_l2])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment