Commit 0a7a4080 authored by Guolin Ke's avatar Guolin Ke
Browse files

change kZeroThreshold to 1e-35f

parent 0481762e
...@@ -19,7 +19,7 @@ const score_t kMinScore = -std::numeric_limits<score_t>::infinity(); ...@@ -19,7 +19,7 @@ const score_t kMinScore = -std::numeric_limits<score_t>::infinity();
const score_t kEpsilon = 1e-15f; const score_t kEpsilon = 1e-15f;
const double kZeroAsMissingValueRange = 1e-20f; const double kZeroThreshold = 1e-35f;
using ReduceFunction = std::function<void(const char*, char*, int)>; using ReduceFunction = std::function<void(const char*, char*, int)>;
......
...@@ -171,7 +171,7 @@ public: ...@@ -171,7 +171,7 @@ public:
std::string ToIfElse(int index, bool is_predict_leaf_index) const; std::string ToIfElse(int index, bool is_predict_leaf_index) const;
inline static bool IsZero(double fval) { inline static bool IsZero(double fval) {
if (fval > -kZeroAsMissingValueRange && fval <= kZeroAsMissingValueRange) { if (fval > -kZeroThreshold && fval <= kZeroThreshold) {
return true; return true;
} else { } else {
return false; return false;
......
...@@ -496,7 +496,7 @@ int LGBM_DatasetCreateFromMat(const void* data, ...@@ -496,7 +496,7 @@ int LGBM_DatasetCreateFromMat(const void* data,
auto idx = sample_indices[i]; auto idx = sample_indices[i];
auto row = get_row_fun(static_cast<int>(idx)); auto row = get_row_fun(static_cast<int>(idx));
for (size_t j = 0; j < row.size(); ++j) { for (size_t j = 0; j < row.size(); ++j) {
if (std::fabs(row[j]) > kEpsilon || std::isnan(row[j])) { if (std::fabs(row[j]) > kZeroThreshold || std::isnan(row[j])) {
sample_values[j].emplace_back(row[j]); sample_values[j].emplace_back(row[j]);
sample_idx[j].emplace_back(static_cast<int>(i)); sample_idx[j].emplace_back(static_cast<int>(i));
} }
...@@ -565,7 +565,7 @@ int LGBM_DatasetCreateFromCSR(const void* indptr, ...@@ -565,7 +565,7 @@ int LGBM_DatasetCreateFromCSR(const void* indptr,
sample_values.resize(inner_data.first + 1); sample_values.resize(inner_data.first + 1);
sample_idx.resize(inner_data.first + 1); sample_idx.resize(inner_data.first + 1);
} }
if (std::fabs(inner_data.second) > kEpsilon || std::isnan(inner_data.second)) { if (std::fabs(inner_data.second) > kZeroThreshold || std::isnan(inner_data.second)) {
sample_values[inner_data.first].emplace_back(inner_data.second); sample_values[inner_data.first].emplace_back(inner_data.second);
sample_idx[inner_data.first].emplace_back(static_cast<int>(i)); sample_idx[inner_data.first].emplace_back(static_cast<int>(i));
} }
...@@ -633,7 +633,7 @@ int LGBM_DatasetCreateFromCSC(const void* col_ptr, ...@@ -633,7 +633,7 @@ int LGBM_DatasetCreateFromCSC(const void* col_ptr,
CSC_RowIterator col_it(col_ptr, col_ptr_type, indices, data, data_type, ncol_ptr, nelem, i); CSC_RowIterator col_it(col_ptr, col_ptr_type, indices, data, data_type, ncol_ptr, nelem, i);
for (int j = 0; j < sample_cnt; j++) { for (int j = 0; j < sample_cnt; j++) {
auto val = col_it.Get(sample_indices[j]); auto val = col_it.Get(sample_indices[j]);
if (std::fabs(val) > kEpsilon || std::isnan(val)) { if (std::fabs(val) > kZeroThreshold || std::isnan(val)) {
sample_values[i].emplace_back(val); sample_values[i].emplace_back(val);
sample_idx[i].emplace_back(j); sample_idx[i].emplace_back(j);
} }
...@@ -1090,7 +1090,7 @@ int LGBM_BoosterPredictForCSC(BoosterHandle handle, ...@@ -1090,7 +1090,7 @@ int LGBM_BoosterPredictForCSC(BoosterHandle handle,
const int tid = omp_get_thread_num(); const int tid = omp_get_thread_num();
for (int j = 0; j < ncol; ++j) { for (int j = 0; j < ncol; ++j) {
auto val = iterators[tid][j].Get(i); auto val = iterators[tid][j].Get(i);
if (std::fabs(val) > kEpsilon || std::isnan(val)) { if (std::fabs(val) > kZeroThreshold || std::isnan(val)) {
one_row.emplace_back(j, val); one_row.emplace_back(j, val);
} }
} }
...@@ -1302,7 +1302,7 @@ RowPairFunctionFromDenseMatric(const void* data, int num_row, int num_col, int d ...@@ -1302,7 +1302,7 @@ RowPairFunctionFromDenseMatric(const void* data, int num_row, int num_col, int d
auto raw_values = inner_function(row_idx); auto raw_values = inner_function(row_idx);
std::vector<std::pair<int, double>> ret; std::vector<std::pair<int, double>> ret;
for (int i = 0; i < static_cast<int>(raw_values.size()); ++i) { for (int i = 0; i < static_cast<int>(raw_values.size()); ++i) {
if (std::fabs(raw_values[i]) > kEpsilon || std::isnan(raw_values[i])) { if (std::fabs(raw_values[i]) > kZeroThreshold || std::isnan(raw_values[i])) {
ret.emplace_back(i, raw_values[i]); ret.emplace_back(i, raw_values[i]);
} }
} }
......
...@@ -154,9 +154,9 @@ namespace LightGBM { ...@@ -154,9 +154,9 @@ namespace LightGBM {
int cnt_zero = 0; int cnt_zero = 0;
int right_cnt_data = 0; int right_cnt_data = 0;
for (int i = 0; i < num_distinct_values; ++i) { for (int i = 0; i < num_distinct_values; ++i) {
if (distinct_values[i] <= -kZeroAsMissingValueRange) { if (distinct_values[i] <= -kZeroThreshold) {
left_cnt_data += counts[i]; left_cnt_data += counts[i];
} else if (distinct_values[i] > kZeroAsMissingValueRange) { } else if (distinct_values[i] > kZeroThreshold) {
right_cnt_data += counts[i]; right_cnt_data += counts[i];
} else { } else {
cnt_zero += counts[i]; cnt_zero += counts[i];
...@@ -165,7 +165,7 @@ namespace LightGBM { ...@@ -165,7 +165,7 @@ namespace LightGBM {
int left_cnt = -1; int left_cnt = -1;
for (int i = 0; i < num_distinct_values; ++i) { for (int i = 0; i < num_distinct_values; ++i) {
if (distinct_values[i] > -kZeroAsMissingValueRange) { if (distinct_values[i] > -kZeroThreshold) {
left_cnt = i; left_cnt = i;
break; break;
} }
...@@ -179,12 +179,12 @@ namespace LightGBM { ...@@ -179,12 +179,12 @@ namespace LightGBM {
int left_max_bin = static_cast<int>(static_cast<double>(left_cnt_data) / (total_sample_cnt - cnt_zero) * (max_bin - 1)); int left_max_bin = static_cast<int>(static_cast<double>(left_cnt_data) / (total_sample_cnt - cnt_zero) * (max_bin - 1));
left_max_bin = std::max(1, left_max_bin); left_max_bin = std::max(1, left_max_bin);
bin_upper_bound = GreedyFindBin(distinct_values, counts, left_cnt, left_max_bin, left_cnt_data, min_data_in_bin); bin_upper_bound = GreedyFindBin(distinct_values, counts, left_cnt, left_max_bin, left_cnt_data, min_data_in_bin);
bin_upper_bound.back() = -kZeroAsMissingValueRange; bin_upper_bound.back() = -kZeroThreshold;
} }
int right_start = -1; int right_start = -1;
for (int i = left_cnt; i < num_distinct_values; ++i) { for (int i = left_cnt; i < num_distinct_values; ++i) {
if (distinct_values[i] > kZeroAsMissingValueRange) { if (distinct_values[i] > kZeroThreshold) {
right_start = i; right_start = i;
break; break;
} }
...@@ -195,7 +195,7 @@ namespace LightGBM { ...@@ -195,7 +195,7 @@ namespace LightGBM {
CHECK(right_max_bin > 0); CHECK(right_max_bin > 0);
auto right_bounds = GreedyFindBin(distinct_values + right_start, counts + right_start, auto right_bounds = GreedyFindBin(distinct_values + right_start, counts + right_start,
num_distinct_values - right_start, right_max_bin, right_cnt_data, min_data_in_bin); num_distinct_values - right_start, right_max_bin, right_cnt_data, min_data_in_bin);
bin_upper_bound.push_back(kZeroAsMissingValueRange); bin_upper_bound.push_back(kZeroThreshold);
bin_upper_bound.insert(bin_upper_bound.end(), right_bounds.begin(), right_bounds.end()); bin_upper_bound.insert(bin_upper_bound.end(), right_bounds.begin(), right_bounds.end());
} else { } else {
bin_upper_bound.push_back(std::numeric_limits<double>::infinity()); bin_upper_bound.push_back(std::numeric_limits<double>::infinity());
......
...@@ -760,7 +760,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines, ...@@ -760,7 +760,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
sample_values.resize(inner_data.first + 1); sample_values.resize(inner_data.first + 1);
sample_indices.resize(inner_data.first + 1); sample_indices.resize(inner_data.first + 1);
} }
if (std::fabs(inner_data.second) > kEpsilon || std::isnan(inner_data.second)) { if (std::fabs(inner_data.second) > kZeroThreshold || std::isnan(inner_data.second)) {
sample_values[inner_data.first].emplace_back(inner_data.second); sample_values[inner_data.first].emplace_back(inner_data.second);
sample_indices[inner_data.first].emplace_back(i); sample_indices[inner_data.first].emplace_back(i);
} }
......
...@@ -29,7 +29,7 @@ public: ...@@ -29,7 +29,7 @@ public:
*out_label = val; *out_label = val;
bias = -1; bias = -1;
} }
else if (std::fabs(val) > kEpsilon || std::isnan(val)) { else if (std::fabs(val) > kZeroThreshold || std::isnan(val)) {
out_features->emplace_back(idx + bias, val); out_features->emplace_back(idx + bias, val);
} }
++idx; ++idx;
...@@ -59,7 +59,7 @@ public: ...@@ -59,7 +59,7 @@ public:
if (idx == label_idx_) { if (idx == label_idx_) {
*out_label = val; *out_label = val;
bias = -1; bias = -1;
} else if (std::fabs(val) > kEpsilon || std::isnan(val)) { } else if (std::fabs(val) > kZeroThreshold || std::isnan(val)) {
out_features->emplace_back(idx + bias, val); out_features->emplace_back(idx + bias, val);
} }
++idx; ++idx;
......
...@@ -317,7 +317,7 @@ std::string Tree::NumericalDecisionIfElse(int node) const { ...@@ -317,7 +317,7 @@ std::string Tree::NumericalDecisionIfElse(int node) const {
std::stringstream str_buf; std::stringstream str_buf;
uint8_t missing_type = GetMissingType(decision_type_[node]); uint8_t missing_type = GetMissingType(decision_type_[node]);
bool default_left = GetDecisionType(decision_type_[node], kDefaultLeftMask); bool default_left = GetDecisionType(decision_type_[node], kDefaultLeftMask);
if (missing_type == 0 || (missing_type == 1 && default_left && kZeroAsMissingValueRange < threshold_[node])) { if (missing_type == 0 || (missing_type == 1 && default_left && kZeroThreshold < threshold_[node])) {
str_buf << "if (fval <= " << threshold_[node] << ") {"; str_buf << "if (fval <= " << threshold_[node] << ") {";
} else if (missing_type == 1) { } else if (missing_type == 1) {
if (default_left) { if (default_left) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment