Unverified Commit 5a80b788 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

introduced specific CHECKs (#2849)

parent 8d90bbe3
......@@ -513,7 +513,7 @@ inline void Tree::PredictContrib(const double* feature_values, int num_features,
output[num_features] += ExpectedValue();
// Run the recursion with preallocated space for the unique path data
if (num_leaves_ > 1) {
CHECK(max_depth_ >= 0);
CHECK_GE(max_depth_, 0);
const int max_path_len = max_depth_ + 1;
std::vector<PathElement> unique_path_data(max_path_len*(max_path_len + 1) / 2);
TreeSHAP(feature_values, output, 0, 0, unique_path_data.data(), 1, 1, -1);
......
......@@ -28,6 +28,30 @@ namespace LightGBM {
" at %s, line %d .\n", __FILE__, __LINE__);
#endif
#ifndef CHECK_EQ
#define CHECK_EQ(a, b) CHECK((a) == (b))
#endif
#ifndef CHECK_NE
#define CHECK_NE(a, b) CHECK((a) != (b))
#endif
#ifndef CHECK_GE
#define CHECK_GE(a, b) CHECK((a) >= (b))
#endif
#ifndef CHECK_LE
#define CHECK_LE(a, b) CHECK((a) <= (b))
#endif
#ifndef CHECK_GT
#define CHECK_GT(a, b) CHECK((a) > (b))
#endif
#ifndef CHECK_LT
#define CHECK_LT(a, b) CHECK((a) < (b))
#endif
#ifndef CHECK_NOTNULL
#define CHECK_NOTNULL(pointer) \
if ((pointer) == nullptr) LightGBM::Log::Fatal(#pointer " Can't be NULL at %s, line %d .\n", __FILE__, __LINE__);
......
......@@ -43,8 +43,8 @@ class Predictor {
"none", LightGBM::PredictionEarlyStopConfig());
if (early_stop && !boosting->NeedAccuratePrediction()) {
PredictionEarlyStopConfig pred_early_stop_config;
CHECK(early_stop_freq > 0);
CHECK(early_stop_margin >= 0);
CHECK_GT(early_stop_freq, 0);
CHECK_GE(early_stop_margin, 0);
pred_early_stop_config.margin_threshold = early_stop_margin;
pred_early_stop_config.round_period = early_stop_freq;
if (boosting->NumberOfClasses() == 1) {
......
......@@ -264,9 +264,9 @@ void GBDT::Train(int snapshot_freq, const std::string& model_output_path) {
}
void GBDT::RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction) {
CHECK(tree_leaf_prediction.size() > 0);
CHECK(static_cast<size_t>(num_data_) == tree_leaf_prediction.size());
CHECK(static_cast<size_t>(models_.size()) == tree_leaf_prediction[0].size());
CHECK_GT(tree_leaf_prediction.size(), 0);
CHECK_EQ(static_cast<size_t>(num_data_), tree_leaf_prediction.size());
CHECK_EQ(static_cast<size_t>(models_.size()), tree_leaf_prediction[0].size());
int num_iterations = static_cast<int>(models_.size() / num_tree_per_iteration_);
std::vector<int> leaf_pred(num_data_);
for (int iter = 0; iter < num_iterations; ++iter) {
......
......@@ -593,7 +593,7 @@ std::vector<double> GBDT::FeatureImportance(int num_iteration, int importance_ty
for (int split_idx = 0; split_idx < models_[iter]->num_leaves() - 1; ++split_idx) {
if (models_[iter]->split_gain(split_idx) > 0) {
#ifdef DEBUG
CHECK(models_[iter]->split_feature(split_idx) >= 0);
CHECK_GE(models_[iter]->split_feature(split_idx), 0);
#endif
feature_importances[models_[iter]->split_feature(split_idx)] += 1.0;
}
......@@ -604,7 +604,7 @@ std::vector<double> GBDT::FeatureImportance(int num_iteration, int importance_ty
for (int split_idx = 0; split_idx < models_[iter]->num_leaves() - 1; ++split_idx) {
if (models_[iter]->split_gain(split_idx) > 0) {
#ifdef DEBUG
CHECK(models_[iter]->split_feature(split_idx) >= 0);
CHECK_GE(models_[iter]->split_feature(split_idx), 0);
#endif
feature_importances[models_[iter]->split_feature(split_idx)] += models_[iter]->split_gain(split_idx);
}
......
......@@ -1050,7 +1050,7 @@ int LGBM_DatasetGetSubset(
omp_set_num_threads(config.num_threads);
}
auto full_dataset = reinterpret_cast<const Dataset*>(handle);
CHECK(num_used_row_indices > 0);
CHECK_GT(num_used_row_indices, 0);
const int32_t lower = 0;
const int32_t upper = full_dataset->num_data() - 1;
Common::CheckElementsIntervalClosed(used_row_indices, lower, upper, num_used_row_indices, "Used indices of subset");
......
......@@ -80,7 +80,7 @@ namespace LightGBM {
int num_distinct_values, int max_bin,
size_t total_cnt, int min_data_in_bin) {
std::vector<double> bin_upper_bound;
CHECK(max_bin > 0);
CHECK_GT(max_bin, 0);
if (num_distinct_values <= max_bin) {
bin_upper_bound.clear();
int cur_cnt_inbin = 0;
......@@ -514,7 +514,7 @@ namespace LightGBM {
static_cast<uint32_t>(ArrayArgs<int>::ArgMax(cnt_in_bin));
if (bin_type_ == BinType::CategoricalBin) {
if (most_freq_bin_ == 0) {
CHECK(num_bin_ > 1);
CHECK_GT(num_bin_, 1);
// FIXME: how to enable `most_freq_bin_ = 0` for categorical features
most_freq_bin_ = 1;
}
......
......@@ -28,7 +28,7 @@ Dataset::Dataset() {
}
Dataset::Dataset(data_size_t num_data) {
CHECK(num_data > 0);
CHECK_GT(num_data, 0);
data_filename_ = "noname";
num_data_ = num_data;
metadata_.Init(num_data_, NO_SPECIFIC, NO_SPECIFIC);
......@@ -403,10 +403,10 @@ void Dataset::Construct(std::vector<std::unique_ptr<BinMapper>>* bin_mappers,
}
}
if (!io_config.max_bin_by_feature.empty()) {
CHECK(static_cast<size_t>(num_total_features_) ==
CHECK_EQ(static_cast<size_t>(num_total_features_),
io_config.max_bin_by_feature.size());
CHECK(*(std::min_element(io_config.max_bin_by_feature.begin(),
io_config.max_bin_by_feature.end())) > 1);
CHECK_GT(*(std::min_element(io_config.max_bin_by_feature.begin(),
io_config.max_bin_by_feature.end())), 1);
max_bin_by_feature_.resize(num_total_features_);
max_bin_by_feature_.assign(io_config.max_bin_by_feature.begin(),
io_config.max_bin_by_feature.end());
......
......@@ -390,8 +390,8 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b
mem_ptr += sizeof(int) * (dataset->num_groups_);
if (!config_.max_bin_by_feature.empty()) {
CHECK(static_cast<size_t>(dataset->num_total_features_) == config_.max_bin_by_feature.size());
CHECK(*(std::min_element(config_.max_bin_by_feature.begin(), config_.max_bin_by_feature.end())) > 1);
CHECK_EQ(static_cast<size_t>(dataset->num_total_features_), config_.max_bin_by_feature.size());
CHECK_GT(*(std::min_element(config_.max_bin_by_feature.begin(), config_.max_bin_by_feature.end())), 1);
dataset->max_bin_by_feature_.resize(dataset->num_total_features_);
dataset->max_bin_by_feature_.assign(config_.max_bin_by_feature.begin(), config_.max_bin_by_feature.end());
} else {
......@@ -542,8 +542,8 @@ Dataset* DatasetLoader::CostructFromSampleData(double** sample_values,
}
}
if (!config_.max_bin_by_feature.empty()) {
CHECK(static_cast<size_t>(num_col) == config_.max_bin_by_feature.size());
CHECK(*(std::min_element(config_.max_bin_by_feature.begin(), config_.max_bin_by_feature.end())) > 1);
CHECK_EQ(static_cast<size_t>(num_col), config_.max_bin_by_feature.size());
CHECK_GT(*(std::min_element(config_.max_bin_by_feature.begin(), config_.max_bin_by_feature.end())), 1);
}
// get forced split
......@@ -850,12 +850,12 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
dataset->num_total_features_ = Network::GlobalSyncUpByMax(dataset->num_total_features_);
}
if (!feature_names_.empty()) {
CHECK(dataset->num_total_features_ == static_cast<int>(feature_names_.size()));
CHECK_EQ(dataset->num_total_features_, static_cast<int>(feature_names_.size()));
}
if (!config_.max_bin_by_feature.empty()) {
CHECK(static_cast<size_t>(dataset->num_total_features_) == config_.max_bin_by_feature.size());
CHECK(*(std::min_element(config_.max_bin_by_feature.begin(), config_.max_bin_by_feature.end())) > 1);
CHECK_EQ(static_cast<size_t>(dataset->num_total_features_), config_.max_bin_by_feature.size());
CHECK_GT(*(std::min_element(config_.max_bin_by_feature.begin(), config_.max_bin_by_feature.end())), 1);
}
// get forced split
......
......@@ -25,7 +25,7 @@ void DCGCalculator::DefaultEvalAt(std::vector<int>* eval_at) {
}
} else {
for (size_t i = 0; i < eval_at->size(); ++i) {
CHECK(ref_eval_at[i] > 0);
CHECK_GT(ref_eval_at[i], 0);
}
}
}
......
......@@ -271,7 +271,7 @@ class GammaMetric : public RegressionMetric<GammaMetric> {
}
inline static void CheckLabel(label_t label) {
CHECK(label > 0);
CHECK_GT(label, 0);
}
};
......@@ -293,7 +293,7 @@ class GammaDevianceMetric : public RegressionMetric<GammaDevianceMetric> {
return sum_loss * 2;
}
inline static void CheckLabel(label_t label) {
CHECK(label > 0);
CHECK_GT(label, 0);
}
};
......
......@@ -743,7 +743,7 @@ class HistogramPool {
void Reset(int cache_size, int total_size) {
cache_size_ = cache_size;
// at least need 2 bucket to store smaller leaf and larger leaf
CHECK(cache_size_ >= 2);
CHECK_GE(cache_size_, 2);
total_size_ = total_size;
if (cache_size_ > total_size_) {
cache_size_ = total_size_;
......
......@@ -95,7 +95,7 @@ void SerialTreeLearner::GetMultiValBin(const Dataset* dataset, bool is_first_tim
void SerialTreeLearner::ResetTrainingData(const Dataset* train_data) {
train_data_ = train_data;
num_data_ = train_data_->num_data();
CHECK(num_features_ == train_data_->num_features());
CHECK_EQ(num_features_, train_data_->num_features());
// initialize splits for leaf
smaller_leaf_splits_->ResetNumData(num_data_);
......@@ -247,7 +247,7 @@ std::vector<int8_t> SerialTreeLearner::GetUsedFeatures(bool is_tree_level) {
for (int i = 0; i < omp_loop_size; ++i) {
int used_feature = valid_feature_indices_[used_feature_indices_[i]];
int inner_feature_index = train_data_->InnerFeatureIndex(used_feature);
CHECK(inner_feature_index >= 0);
CHECK_GE(inner_feature_index, 0);
ret[inner_feature_index] = 1;
}
} else if (used_feature_indices_.size() <= 0) {
......@@ -259,7 +259,7 @@ std::vector<int8_t> SerialTreeLearner::GetUsedFeatures(bool is_tree_level) {
for (int i = 0; i < omp_loop_size; ++i) {
int used_feature = valid_feature_indices_[sampled_indices[i]];
int inner_feature_index = train_data_->InnerFeatureIndex(used_feature);
CHECK(inner_feature_index >= 0);
CHECK_GE(inner_feature_index, 0);
ret[inner_feature_index] = 1;
}
} else {
......@@ -271,7 +271,7 @@ std::vector<int8_t> SerialTreeLearner::GetUsedFeatures(bool is_tree_level) {
for (int i = 0; i < omp_loop_size; ++i) {
int used_feature = valid_feature_indices_[used_feature_indices_[sampled_indices[i]]];
int inner_feature_index = train_data_->InnerFeatureIndex(used_feature);
CHECK(inner_feature_index >= 0);
CHECK_GE(inner_feature_index, 0);
ret[inner_feature_index] = 1;
}
}
......@@ -706,11 +706,11 @@ void SerialTreeLearner::Split(Tree* tree, int best_leaf, int* left_leaf, int* ri
// init the leaves that used on next iteration
if (best_split_info.left_count < best_split_info.right_count) {
CHECK(best_split_info.left_count > 0);
CHECK_GT(best_split_info.left_count, 0);
smaller_leaf_splits_->Init(*left_leaf, data_partition_.get(), best_split_info.left_sum_gradient, best_split_info.left_sum_hessian);
larger_leaf_splits_->Init(*right_leaf, data_partition_.get(), best_split_info.right_sum_gradient, best_split_info.right_sum_hessian);
} else {
CHECK(best_split_info.right_count > 0);
CHECK_GT(best_split_info.right_count, 0);
smaller_leaf_splits_->Init(*right_leaf, data_partition_.get(), best_split_info.right_sum_gradient, best_split_info.right_sum_hessian);
larger_leaf_splits_->Init(*left_leaf, data_partition_.get(), best_split_info.left_sum_gradient, best_split_info.left_sum_hessian);
}
......@@ -727,7 +727,7 @@ void SerialTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj
CHECK(tree->num_leaves() <= data_partition_->num_leaves());
const data_size_t* bag_mapper = nullptr;
if (total_num_data != num_data_) {
CHECK(bag_cnt == num_data_);
CHECK_EQ(bag_cnt, num_data_);
bag_mapper = bag_indices;
}
std::vector<int> n_nozeroworker_perleaf(tree->num_leaves(), 1);
......@@ -742,7 +742,7 @@ void SerialTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj
const double new_output = obj->RenewTreeOutput(output, residual_getter, index_mapper, bag_mapper, cnt_leaf_data);
tree->SetLeafOutput(i, new_output);
} else {
CHECK(num_machines > 1);
CHECK_GT(num_machines, 1);
tree->SetLeafOutput(i, 0.0);
n_nozeroworker_perleaf[i] = 0;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment