Unverified Commit 51edbda7 authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

fix feature index in Dataset::AddFeaturesFrom (fixes #5410) (#5650)

parent 42b6322c
...@@ -1495,7 +1495,7 @@ void Dataset::AddFeaturesFrom(Dataset* other) { ...@@ -1495,7 +1495,7 @@ void Dataset::AddFeaturesFrom(Dataset* other) {
other->max_bin_by_feature_, other->num_total_features_, -1); other->max_bin_by_feature_, other->num_total_features_, -1);
num_total_features_ += other->num_total_features_; num_total_features_ += other->num_total_features_;
for (size_t i = 0; i < (other->numeric_feature_map_).size(); ++i) { for (size_t i = 0; i < (other->numeric_feature_map_).size(); ++i) {
int feat_ind = numeric_feature_map_[i]; int feat_ind = other->numeric_feature_map_[i];
if (feat_ind > -1) { if (feat_ind > -1) {
numeric_feature_map_.push_back(feat_ind + num_numeric_features_); numeric_feature_map_.push_back(feat_ind + num_numeric_features_);
} else { } else {
......
...@@ -376,6 +376,29 @@ def test_add_features_from_different_sources(): ...@@ -376,6 +376,29 @@ def test_add_features_from_different_sources():
assert d1.feature_name == res_feature_names assert d1.feature_name == res_feature_names
def test_add_features_does_not_fail_if_initial_dataset_has_zero_informative_features(capsys):
arr_a = np.zeros((100, 1), dtype=np.float32)
arr_b = np.random.normal(size=(100, 5))
dataset_a = lgb.Dataset(arr_a).construct()
expected_msg = (
'[LightGBM] [Warning] There are no meaningful features which satisfy '
'the provided configuration. Decreasing Dataset parameters min_data_in_bin '
'or min_data_in_leaf and re-constructing Dataset might resolve this warning.\n'
)
log_lines = capsys.readouterr().out
assert expected_msg in log_lines
dataset_b = lgb.Dataset(arr_b).construct()
original_handle = dataset_a.handle.value
dataset_a.add_features_from(dataset_b)
assert dataset_a.num_feature() == 6
assert dataset_a.num_data() == 100
assert dataset_a.handle.value == original_handle
def test_cegb_affects_behavior(tmp_path): def test_cegb_affects_behavior(tmp_path):
X = np.random.random((100, 5)) X = np.random.random((100, 5))
X[:, [1, 3]] = 0 X[:, [1, 3]] = 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment