Unverified Commit 6de9bafa authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

Fix potential overflow "Multiplication result converted to larger type" (#5189)



* Update dataset_loader.cpp

* Update gbdt.h

* Update regression_objective.hpp

* Update linker_topo.cpp

* Update xentropy_objective.hpp

* Update regression_objective.hpp

* investigate inf test failure

* avoid overflow in regression objective

* remove `test_inf_handle` test
Co-authored-by: default avatarGuolin Ke <guolin.ke@outlook.com>
parent 14ca8fc0
...@@ -193,7 +193,7 @@ class GBDT : public GBDTBase { ...@@ -193,7 +193,7 @@ class GBDT : public GBDTBase {
if (data_idx > 0) { if (data_idx > 0) {
num_data = valid_score_updater_[data_idx - 1]->num_data(); num_data = valid_score_updater_[data_idx - 1]->num_data();
} }
return num_data * num_class_; return static_cast<int64_t>(num_data) * num_class_;
} }
/*! /*!
......
...@@ -1257,7 +1257,7 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat ...@@ -1257,7 +1257,7 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat
} else { } else {
OMP_INIT_EX(); OMP_INIT_EX();
// if need to prediction with initial model // if need to prediction with initial model
std::vector<double> init_score(dataset->num_data_ * num_class_); std::vector<double> init_score(static_cast<size_t>(dataset->num_data_) * num_class_);
#pragma omp parallel for schedule(static) private(oneline_features) firstprivate(tmp_label, feature_row) #pragma omp parallel for schedule(static) private(oneline_features) firstprivate(tmp_label, feature_row)
for (data_size_t i = 0; i < dataset->num_data_; ++i) { for (data_size_t i = 0; i < dataset->num_data_; ++i) {
OMP_LOOP_EX_BEGIN(); OMP_LOOP_EX_BEGIN();
...@@ -1324,7 +1324,7 @@ void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser* ...@@ -1324,7 +1324,7 @@ void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser*
const std::vector<data_size_t>& used_data_indices, Dataset* dataset) { const std::vector<data_size_t>& used_data_indices, Dataset* dataset) {
std::vector<double> init_score; std::vector<double> init_score;
if (predict_fun_) { if (predict_fun_) {
init_score = std::vector<double>(dataset->num_data_ * num_class_); init_score = std::vector<double>(static_cast<size_t>(dataset->num_data_) * num_class_);
} }
std::function<void(data_size_t, const std::vector<std::string>&)> process_fun = std::function<void(data_size_t, const std::vector<std::string>&)> process_fun =
[this, &init_score, &parser, &dataset] [this, &init_score, &parser, &dataset]
......
...@@ -155,7 +155,7 @@ RecursiveHalvingMap RecursiveHalvingMap::Construct(int rank, int num_machines) { ...@@ -155,7 +155,7 @@ RecursiveHalvingMap RecursiveHalvingMap::Construct(int rank, int num_machines) {
rec_map.ranks[i] = next_node_idx; rec_map.ranks[i] = next_node_idx;
// get receive block information // get receive block information
const int recv_block_start = cur_group_idx / distance[i]; const int recv_block_start = cur_group_idx / distance[i];
rec_map.recv_block_start[i] = group_block_start[recv_block_start * distance[i]]; rec_map.recv_block_start[i] = group_block_start[static_cast<size_t>(recv_block_start) * distance[i]];
int recv_block_len = 0; int recv_block_len = 0;
// accumulate block len // accumulate block len
for (int j = 0; j < distance[i]; ++j) { for (int j = 0; j < distance[i]; ++j) {
...@@ -164,7 +164,7 @@ RecursiveHalvingMap RecursiveHalvingMap::Construct(int rank, int num_machines) { ...@@ -164,7 +164,7 @@ RecursiveHalvingMap RecursiveHalvingMap::Construct(int rank, int num_machines) {
rec_map.recv_block_len[i] = recv_block_len; rec_map.recv_block_len[i] = recv_block_len;
// get send block information // get send block information
const int send_block_start = (cur_group_idx + dir * distance[i]) / distance[i]; const int send_block_start = (cur_group_idx + dir * distance[i]) / distance[i];
rec_map.send_block_start[i] = group_block_start[send_block_start * distance[i]]; rec_map.send_block_start[i] = group_block_start[static_cast<size_t>(send_block_start) * distance[i]];
int send_block_len = 0; int send_block_len = 0;
// accumulate block len // accumulate block len
for (int j = 0; j < distance[i]; ++j) { for (int j = 0; j < distance[i]; ++j) {
......
...@@ -34,7 +34,7 @@ class BinaryLogloss: public ObjectiveFunction { ...@@ -34,7 +34,7 @@ class BinaryLogloss: public ObjectiveFunction {
} }
is_pos_ = is_pos; is_pos_ = is_pos;
if (is_pos_ == nullptr) { if (is_pos_ == nullptr) {
is_pos_ = [](label_t label) {return label > 0; }; is_pos_ = [](label_t label) { return label > 0; };
} }
} }
......
...@@ -121,7 +121,7 @@ class MulticlassSoftmax: public ObjectiveFunction { ...@@ -121,7 +121,7 @@ class MulticlassSoftmax: public ObjectiveFunction {
if (label_int_[i] == k) { if (label_int_[i] == k) {
gradients[idx] = static_cast<score_t>((p - 1.0f) * weights_[i]); gradients[idx] = static_cast<score_t>((p - 1.0f) * weights_[i]);
} else { } else {
gradients[idx] = static_cast<score_t>((p) * weights_[i]); gradients[idx] = static_cast<score_t>(p * weights_[i]);
} }
hessians[idx] = static_cast<score_t>((factor_ * p * (1.0f - p))* weights_[i]); hessians[idx] = static_cast<score_t>((factor_ * p * (1.0f - p))* weights_[i]);
} }
......
...@@ -24,7 +24,7 @@ namespace LightGBM { ...@@ -24,7 +24,7 @@ namespace LightGBM {
for (data_size_t i = 0; i < cnt_data; ++i) { \ for (data_size_t i = 0; i < cnt_data; ++i) { \
ref_data[i] = data_reader(i); \ ref_data[i] = data_reader(i); \
} \ } \
const double float_pos = (1.0f - alpha) * cnt_data; \ const double float_pos = static_cast<double>(1.0 - alpha) * cnt_data; \
const data_size_t pos = static_cast<data_size_t>(float_pos); \ const data_size_t pos = static_cast<data_size_t>(float_pos); \
if (pos < 1) { \ if (pos < 1) { \
return ref_data[ArrayArgs<T>::ArgMax(ref_data)]; \ return ref_data[ArrayArgs<T>::ArgMax(ref_data)]; \
...@@ -135,7 +135,7 @@ class RegressionL2loss: public ObjectiveFunction { ...@@ -135,7 +135,7 @@ class RegressionL2loss: public ObjectiveFunction {
} else { } else {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
gradients[i] = static_cast<score_t>((score[i] - label_[i]) * weights_[i]); gradients[i] = static_cast<score_t>(static_cast<score_t>((score[i] - label_[i])) * weights_[i]);
hessians[i] = static_cast<score_t>(weights_[i]); hessians[i] = static_cast<score_t>(weights_[i]);
} }
} }
...@@ -176,7 +176,7 @@ class RegressionL2loss: public ObjectiveFunction { ...@@ -176,7 +176,7 @@ class RegressionL2loss: public ObjectiveFunction {
if (weights_ != nullptr) { if (weights_ != nullptr) {
#pragma omp parallel for schedule(static) reduction(+:suml, sumw) if (!deterministic_) #pragma omp parallel for schedule(static) reduction(+:suml, sumw) if (!deterministic_)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
suml += label_[i] * weights_[i]; suml += static_cast<double>(label_[i]) * weights_[i];
sumw += weights_[i]; sumw += weights_[i];
} }
} else { } else {
...@@ -330,7 +330,7 @@ class RegressionHuberLoss: public RegressionL2loss { ...@@ -330,7 +330,7 @@ class RegressionHuberLoss: public RegressionL2loss {
if (std::abs(diff) <= alpha_) { if (std::abs(diff) <= alpha_) {
gradients[i] = static_cast<score_t>(diff * weights_[i]); gradients[i] = static_cast<score_t>(diff * weights_[i]);
} else { } else {
gradients[i] = static_cast<score_t>(Common::Sign(diff) * weights_[i] * alpha_); gradients[i] = static_cast<score_t>(Common::Sign(diff) * static_cast<score_t>(weights_[i]) * alpha_);
} }
hessians[i] = static_cast<score_t>(weights_[i]); hessians[i] = static_cast<score_t>(weights_[i]);
} }
......
...@@ -117,7 +117,7 @@ class CrossEntropy: public ObjectiveFunction { ...@@ -117,7 +117,7 @@ class CrossEntropy: public ObjectiveFunction {
#pragma omp parallel for schedule(static) reduction(+:suml, sumw) if (!deterministic_) #pragma omp parallel for schedule(static) reduction(+:suml, sumw) if (!deterministic_)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
suml += label_[i] * weights_[i]; suml += static_cast<double>(label_[i]) * weights_[i];
sumw += weights_[i]; sumw += weights_[i];
} }
} else { } else {
...@@ -247,7 +247,7 @@ class CrossEntropyLambda: public ObjectiveFunction { ...@@ -247,7 +247,7 @@ class CrossEntropyLambda: public ObjectiveFunction {
#pragma omp parallel for schedule(static) reduction(+:suml, sumw) if (!deterministic_) #pragma omp parallel for schedule(static) reduction(+:suml, sumw) if (!deterministic_)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
suml += label_[i] * weights_[i]; suml += static_cast<double>(label_[i]) * weights_[i];
sumw += weights_[i]; sumw += weights_[i];
} }
} else { } else {
......
...@@ -1045,19 +1045,6 @@ def test_multiple_eval_metrics(): ...@@ -1045,19 +1045,6 @@ def test_multiple_eval_metrics():
assert 'binary_logloss' in gbm.evals_result_['training'] assert 'binary_logloss' in gbm.evals_result_['training']
def test_inf_handle():
nrows = 100
ncols = 10
X = np.random.randn(nrows, ncols)
y = np.random.randn(nrows) + np.full(nrows, 1e30)
weight = np.full(nrows, 1e10)
params = {'n_estimators': 20, 'verbose': -1}
params_fit = {'X': X, 'y': y, 'sample_weight': weight, 'eval_set': (X, y),
'callbacks': [lgb.early_stopping(5)]}
gbm = lgb.LGBMRegressor(**params).fit(**params_fit)
np.testing.assert_allclose(gbm.evals_result_['training']['l2'], np.inf)
def test_nan_handle(): def test_nan_handle():
nrows = 100 nrows = 100
ncols = 10 ncols = 10
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment