Unverified Commit 0df10887 authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[ci] [c++] use 'pre-commit' to run 'cpplint', upgrade to 'cpplint' 2.0.2 (#7002)

* [ci] [c++] use 'pre-commit' to run 'cpplint', upgrade to 'cpplint' 2.0.2

* remove bashisms

* one more pipefail use

* another pipefail
parent d9bdda6c
#!/bin/bash
set -e -E -u -o pipefail
echo "running cpplint"
cpplint \
--filter=-build/c++11,-build/include_subdir,-build/header_guard,-whitespace/line_length \
--recursive ./src ./include ./R-package ./swig ./tests \
|| exit 1
echo "done running cpplint"
set -e -u
echo "checking that all OpenMP pragmas specify num_threads()"
get_omp_pragmas_without_num_threads() {
......@@ -28,11 +21,11 @@ get_omp_pragmas_without_num_threads() {
# consider this a failure and stop execution of the script.
#
# ref: https://www.gnu.org/software/grep/manual/html_node/Exit-Status.html
set +e +o pipefail
set +e
PROBLEMATIC_LINES=$(
get_omp_pragmas_without_num_threads
)
set -e -o pipefail
set -e
if test "${PROBLEMATIC_LINES}" != ""; then
get_omp_pragmas_without_num_threads
echo "Found '#pragma omp parallel' not using explicit num_threads() configuration. Fix those."
......
......@@ -105,7 +105,6 @@ if [[ $TASK == "lint" ]]; then
conda create -q -y -n "${CONDA_ENV}" \
"${CONDA_PYTHON_REQUIREMENT}" \
'biome>=1.9.3' \
'cpplint>=1.6.0' \
'matplotlib-base>=3.9.1' \
'mypy>=1.11.1' \
'pre-commit>=3.8.0' \
......@@ -118,8 +117,6 @@ if [[ $TASK == "lint" ]]; then
bash ./.ci/run-pre-commit-mypy.sh || exit 1
echo "Linting R code"
Rscript ./.ci/lint-r-code.R "${BUILD_DIRECTORY}" || exit 1
echo "Linting C++ code"
bash ./.ci/lint-cpp.sh || exit 1
echo "Linting JavaScript code"
bash ./.ci/lint-js.sh || exit 1
exit 0
......
......@@ -22,6 +22,22 @@ repos:
hooks:
- id: cmakelint
args: ["--linelength=120", "--filter=-convention/filename,-package/stdargs,-readability/wonkycase"]
- repo: https://github.com/cpplint/cpplint
rev: '2.0.2'
hooks:
- id: cpplint
args:
- --recursive
- --filter=-build/c++11,-build/include_subdir,-build/include_what_you_use,-build/header_guard,-whitespace/indent_namespace,-whitespace/line_length
- repo: local
hooks:
- id: check-omp-pragmas
name: check-omp-pragmas
entry: sh
args:
- .ci/check-omp-pragmas.sh
language: system
pass_filenames: false
- repo: https://github.com/adrienverge/yamllint
rev: v1.37.1
hooks:
......
......@@ -554,9 +554,13 @@ class Dataset {
}
inline void FinishOneRow(int tid, data_size_t row_idx, const std::vector<bool>& is_feature_added) {
if (is_finish_load_) { return; }
if (is_finish_load_) {
return;
}
for (auto fidx : feature_need_push_zeros_) {
if (is_feature_added[fidx]) { continue; }
if (is_feature_added[fidx]) {
continue;
}
const int group = feature2group_[fidx];
const int sub_feature = feature2subfeature_[fidx];
feature_groups_[group]->PushData(tid, sub_feature, row_idx, 0.0f);
......@@ -587,10 +591,14 @@ class Dataset {
}
inline void PushOneRow(int tid, data_size_t row_idx, const std::vector<std::pair<int, double>>& feature_values) {
if (is_finish_load_) { return; }
if (is_finish_load_) {
return;
}
std::vector<bool> is_feature_added(num_features_, false);
for (auto& inner_data : feature_values) {
if (inner_data.first >= num_total_features_) { continue; }
if (inner_data.first >= num_total_features_) {
continue;
}
int feature_idx = used_feature_map_[inner_data.first];
if (feature_idx >= 0) {
is_feature_added[feature_idx] = true;
......
......@@ -112,17 +112,33 @@ class ArrayArgs {
VAL_T v = ref[end - 1];
for (;;) {
while (ref[++i] > v) {}
while (v > ref[--j]) { if (j == start) { break; } }
if (i >= j) { break; }
while (v > ref[--j]) {
if (j == start) {
break;
}
}
if (i >= j) {
break;
}
std::swap(ref[i], ref[j]);
if (ref[i] == v) { p++; std::swap(ref[p], ref[i]); }
if (v == ref[j]) { q--; std::swap(ref[j], ref[q]); }
if (ref[i] == v) {
p++;
std::swap(ref[p], ref[i]);
}
if (v == ref[j]) {
q--;
std::swap(ref[j], ref[q]);
}
}
std::swap(ref[i], ref[end - 1]);
j = i - 1;
i = i + 1;
for (int k = start; k <= p; k++, j--) { std::swap(ref[k], ref[j]); }
for (int k = end - 2; k >= q; k--, i++) { std::swap(ref[i], ref[k]); }
for (int k = start; k <= p; k++, j--) {
std::swap(ref[k], ref[j]);
}
for (int k = end - 2; k >= q; k--, i++) {
std::swap(ref[i], ref[k]);
}
*l = j;
*r = i;
}
......
......@@ -315,9 +315,18 @@ inline static const char* Atof(const char* p, double* out) {
}
if (expon > 308) expon = 308;
// Calculate scaling factor.
while (expon >= 50) { scale *= 1E50; expon -= 50; }
while (expon >= 8) { scale *= 1E8; expon -= 8; }
while (expon > 0) { scale *= 10.0; expon -= 1; }
while (expon >= 50) {
scale *= 1E50;
expon -= 50;
}
while (expon >= 8) {
scale *= 1E8;
expon -= 8;
}
while (expon > 0) {
scale *= 10.0;
expon -= 1;
}
}
// Return signed and scaled floating point result.
*out = sign * (frac ? (value / scale) : (value * scale));
......@@ -713,7 +722,9 @@ static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred, _VTRanIt*) {
size_t mid = left + s;
size_t right = mid + s;
right = std::min(len, right);
if (mid >= right) { continue; }
if (mid >= right) {
continue;
}
std::copy(_First + left, _First + mid, buf + left);
std::merge(buf + left, buf + mid, _First + mid, _First + right, _First + left, _Pred);
}
......
......@@ -62,9 +62,13 @@ class ThreadExceptionHelper {
}
void CaptureException() {
// only catch first exception.
if (ex_ptr_ != nullptr) { return; }
if (ex_ptr_ != nullptr) {
return;
}
std::unique_lock<std::mutex> guard(lock_);
if (ex_ptr_ != nullptr) { return; }
if (ex_ptr_ != nullptr) {
return;
}
ex_ptr_ = std::current_exception();
}
......
......@@ -124,7 +124,9 @@ class TextReader {
++i;
++total_cnt;
// skip end of line
while ((buffer_process[i] == '\n' || buffer_process[i] == '\r') && i < read_cnt) { ++i; }
while ((buffer_process[i] == '\n' || buffer_process[i] == '\r') && i < read_cnt) {
++i;
}
last_i = i;
} else {
++i;
......@@ -284,7 +286,9 @@ class TextReader {
++i;
++total_cnt;
// skip end of line
while ((buffer_process[i] == '\n' || buffer_process[i] == '\r') && i < read_cnt) { ++i; }
while ((buffer_process[i] == '\n' || buffer_process[i] == '\r') && i < read_cnt) {
++i;
}
last_i = i;
} else {
++i;
......
......@@ -121,7 +121,9 @@ void Application::LoadData() {
if (config_.is_provide_training_metric) {
for (auto metric_type : config_.metric) {
auto metric = std::unique_ptr<Metric>(Metric::CreateMetric(metric_type, config_));
if (metric == nullptr) { continue; }
if (metric == nullptr) {
continue;
}
metric->Init(train_data_->metadata(), train_data_->num_data());
train_metric_.push_back(std::move(metric));
}
......@@ -149,7 +151,9 @@ void Application::LoadData() {
valid_metrics_.emplace_back();
for (auto metric_type : config_.metric) {
auto metric = std::unique_ptr<Metric>(Metric::CreateMetric(metric_type, config_));
if (metric == nullptr) { continue; }
if (metric == nullptr) {
continue;
}
metric->Init(valid_datas_.back()->metadata(),
valid_datas_.back()->num_data());
valid_metrics_.back().push_back(std::move(metric));
......
......@@ -210,7 +210,9 @@ void GBDT::AddValidDataset(const Dataset* valid_data,
if (early_stopping_round_ > 0) {
auto num_metrics = valid_metrics.size();
if (es_first_metric_only_) { num_metrics = 1; }
if (es_first_metric_only_) {
num_metrics = 1;
}
best_iter_.emplace_back(num_metrics, 0);
best_score_.emplace_back(num_metrics, kMinScore);
best_msg_.emplace_back(num_metrics);
......@@ -452,7 +454,9 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
}
void GBDT::RollbackOneIter() {
if (iter_ <= 0) { return; }
if (iter_ <= 0) {
return;
}
// reset score
for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
auto curr_tree = models_.size() - num_tree_per_iteration_ + cur_tree_id;
......@@ -588,7 +592,9 @@ std::string GBDT::OutputMetric(int iter) {
msg_buf << tmp_buf.str() << '\n';
}
}
if (es_first_metric_only_ && j > 0) { continue; }
if (es_first_metric_only_ && j > 0) {
continue;
}
if (ret.empty() && early_stopping_round_ > 0) {
auto cur_score = valid_metrics_[i][j]->factor_to_bigger_better() * test_scores.back();
if (cur_score - best_score_[i][j] > early_stopping_min_delta_) {
......@@ -596,7 +602,9 @@ std::string GBDT::OutputMetric(int iter) {
best_iter_[i][j] = iter;
meet_early_stopping_pairs.emplace_back(i, j);
} else {
if (iter - best_iter_[i][j] >= early_stopping_round_) { ret = best_msg_[i][j]; }
if (iter - best_iter_[i][j] >= early_stopping_round_) {
ret = best_msg_[i][j];
}
}
}
}
......
......@@ -30,7 +30,9 @@ class GOSSStrategy : public SampleStrategy {
void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) override {
bag_data_cnt_ = num_data_;
// not subsample for first iterations
if (iter < static_cast<int>(1.0f / config_->learning_rate)) { return; }
if (iter < static_cast<int>(1.0f / config_->learning_rate)) {
return;
}
auto left_cnt = bagging_runner_.Run<true>(
num_data_,
[=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t* left,
......
......@@ -182,7 +182,9 @@ class RF : public GBDT {
}
void RollbackOneIter() override {
if (iter_ <= 0) { return; }
if (iter_ <= 0) {
return;
}
int cur_iter = iter_ + num_init_iteration_ - 1;
// reset score
for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
......
......@@ -218,7 +218,9 @@ class Booster {
for (auto metric_type : config_.metric) {
auto metric = std::unique_ptr<Metric>(
Metric::CreateMetric(metric_type, config_));
if (metric == nullptr) { continue; }
if (metric == nullptr) {
continue;
}
metric->Init(train_data_->metadata(), train_data_->num_data());
train_metric_.push_back(std::move(metric));
}
......@@ -394,7 +396,9 @@ class Booster {
valid_metrics_.emplace_back();
for (auto metric_type : config_.metric) {
auto metric = std::unique_ptr<Metric>(Metric::CreateMetric(metric_type, config_));
if (metric == nullptr) { continue; }
if (metric == nullptr) {
continue;
}
metric->Init(valid_data->metadata(), valid_data->num_data());
valid_metrics_.back().push_back(std::move(metric));
}
......@@ -1603,7 +1607,9 @@ int LGBM_DatasetCreateFromCSC(const void* col_ptr,
OMP_LOOP_EX_BEGIN();
const int tid = omp_get_thread_num();
int feature_idx = ret->InnerFeatureIndex(i);
if (feature_idx < 0) { continue; }
if (feature_idx < 0) {
continue;
}
int group = ret->Feature2Group(feature_idx);
int sub_feature = ret->Feture2SubFeature(feature_idx);
CSC_RowIterator col_it(col_ptr, col_ptr_type, indices, data, data_type, ncol_ptr, nelem, i);
......@@ -1614,7 +1620,9 @@ int LGBM_DatasetCreateFromCSC(const void* col_ptr,
auto pair = col_it.NextNonZero();
row_idx = pair.first;
// no more data
if (row_idx < 0) { break; }
if (row_idx < 0) {
break;
}
ret->PushOneData(tid, row_idx, group, feature_idx, sub_feature, pair.second);
}
} else {
......@@ -1838,7 +1846,9 @@ int LGBM_DatasetSetField(DatasetHandle handle,
} else if (type == C_API_DTYPE_FLOAT64) {
is_success = dataset->SetDoubleField(field_name, reinterpret_cast<const double*>(field_data), static_cast<int32_t>(num_element));
}
if (!is_success) { Log::Fatal("Input data type error or field not found"); }
if (!is_success) {
Log::Fatal("Input data type error or field not found");
}
API_END();
}
......@@ -1875,8 +1885,12 @@ int LGBM_DatasetGetField(DatasetHandle handle,
*out_type = C_API_DTYPE_FLOAT64;
is_success = true;
}
if (!is_success) { Log::Fatal("Field not found"); }
if (*out_ptr == nullptr) { *out_len = 0; }
if (!is_success) {
Log::Fatal("Field not found");
}
if (*out_ptr == nullptr) {
*out_len = 0;
}
API_END();
}
......
......@@ -131,7 +131,9 @@ namespace LightGBM {
upper_bounds[bin_cnt] = distinct_values[i];
++bin_cnt;
lower_bounds[bin_cnt] = distinct_values[i + 1];
if (bin_cnt >= max_bin - 1) { break; }
if (bin_cnt >= max_bin - 1) {
break;
}
cur_cnt_inbin = 0;
if (!is_big_count_value[i]) {
--rest_bin_cnt;
......
......@@ -665,7 +665,9 @@ Dataset* DatasetLoader::ConstructFromSampleData(double** sample_values,
std::vector<int> start(num_machines);
std::vector<int> len(num_machines);
int step = (num_total_features + num_machines - 1) / num_machines;
if (step < 1) { step = 1; }
if (step < 1) {
step = 1;
}
start[0] = 0;
for (int i = 0; i < num_machines - 1; ++i) {
......@@ -1168,7 +1170,9 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
std::vector<int> start(num_machines);
std::vector<int> len(num_machines);
int step = (dataset->num_total_features_ + num_machines - 1) / num_machines;
if (step < 1) { step = 1; }
if (step < 1) {
step = 1;
}
start[0] = 0;
for (int i = 0; i < num_machines - 1; ++i) {
......@@ -1284,7 +1288,9 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat
std::vector<bool> is_feature_added(dataset->num_features_, false);
// push data
for (auto& inner_data : oneline_features) {
if (inner_data.first >= dataset->num_total_features_) { continue; }
if (inner_data.first >= dataset->num_total_features_) {
continue;
}
int feature_idx = dataset->used_feature_map_[inner_data.first];
if (feature_idx >= 0) {
is_feature_added[feature_idx] = true;
......@@ -1341,7 +1347,9 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat
// push data
std::vector<bool> is_feature_added(dataset->num_features_, false);
for (auto& inner_data : oneline_features) {
if (inner_data.first >= dataset->num_total_features_) { continue; }
if (inner_data.first >= dataset->num_total_features_) {
continue;
}
int feature_idx = dataset->used_feature_map_[inner_data.first];
if (feature_idx >= 0) {
is_feature_added[feature_idx] = true;
......@@ -1414,7 +1422,9 @@ void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser*
std::vector<bool> is_feature_added(dataset->num_features_, false);
// push data
for (auto& inner_data : oneline_features) {
if (inner_data.first >= dataset->num_total_features_) { continue; }
if (inner_data.first >= dataset->num_total_features_) {
continue;
}
int feature_idx = dataset->used_feature_map_[inner_data.first];
if (feature_idx >= 0) {
is_feature_added[feature_idx] = true;
......
......@@ -56,7 +56,9 @@ void Metadata::Init(data_size_t num_data, int weight_idx, int query_idx) {
Log::Info("Using query id in data file, ignoring the additional query file");
query_boundaries_.clear();
}
if (!query_weights_.empty()) { query_weights_.clear(); }
if (!query_weights_.empty()) {
query_weights_.clear();
}
queries_ = std::vector<data_size_t>(num_data_, 0);
query_load_from_file_ = false;
}
......@@ -401,7 +403,9 @@ void Metadata::InsertInitScores(const double* init_scores, data_size_t start_ind
// Note that len here is row count, not num_init_score, so we compare against num_data
Log::Fatal("Inserted initial score data is too large for dataset");
}
if (init_score_.empty()) { init_score_.resize(num_init_score_); }
if (init_score_.empty()) {
init_score_.resize(num_init_score_);
}
int nclasses = num_init_score_classes();
......@@ -455,7 +459,9 @@ void Metadata::InsertLabels(const label_t* labels, data_size_t start_index, data
if (start_index + len > num_data_) {
Log::Fatal("Inserted label data is too large for dataset");
}
if (label_.empty()) { label_.resize(num_data_); }
if (label_.empty()) {
label_.resize(num_data_);
}
memcpy(label_.data() + start_index, labels, sizeof(label_t) * len);
......@@ -511,7 +517,9 @@ void Metadata::InsertWeights(const label_t* weights, data_size_t start_index, da
if (start_index + len > num_weights_) {
Log::Fatal("Inserted weight data is too large for dataset");
}
if (weights_.empty()) { weights_.resize(num_weights_); }
if (weights_.empty()) {
weights_.resize(num_weights_);
}
memcpy(weights_.data() + start_index, weights, sizeof(label_t) * len);
......@@ -797,20 +805,26 @@ void Metadata::LoadFromMemory(const void* memory) {
num_queries_ = *(reinterpret_cast<const data_size_t*>(mem_ptr));
mem_ptr += VirtualFileWriter::AlignedSize(sizeof(num_queries_));
if (!label_.empty()) { label_.clear(); }
if (!label_.empty()) {
label_.clear();
}
label_ = std::vector<label_t>(num_data_);
std::memcpy(label_.data(), mem_ptr, sizeof(label_t) * num_data_);
mem_ptr += VirtualFileWriter::AlignedSize(sizeof(label_t) * num_data_);
if (num_weights_ > 0) {
if (!weights_.empty()) { weights_.clear(); }
if (!weights_.empty()) {
weights_.clear();
}
weights_ = std::vector<label_t>(num_weights_);
std::memcpy(weights_.data(), mem_ptr, sizeof(label_t) * num_weights_);
mem_ptr += VirtualFileWriter::AlignedSize(sizeof(label_t) * num_weights_);
weight_load_from_file_ = true;
}
if (num_queries_ > 0) {
if (!query_boundaries_.empty()) { query_boundaries_.clear(); }
if (!query_boundaries_.empty()) {
query_boundaries_.clear();
}
query_boundaries_ = std::vector<data_size_t>(num_queries_ + 1);
std::memcpy(query_boundaries_.data(), mem_ptr, sizeof(data_size_t) * (num_queries_ + 1));
mem_ptr += VirtualFileWriter::AlignedSize(sizeof(data_size_t) *
......
......@@ -31,7 +31,9 @@ void DCGCalculator::DefaultEvalAt(std::vector<int>* eval_at) {
}
void DCGCalculator::DefaultLabelGain(std::vector<double>* label_gain) {
if (!label_gain->empty()) { return; }
if (!label_gain->empty()) {
return;
}
// label_gain = 2^i - 1, may overflow, so we use 31 here
const int max_label = 31;
label_gain->push_back(0.0f);
......@@ -60,7 +62,9 @@ double DCGCalculator::CalMaxDCGAtK(data_size_t k, const label_t* label, data_siz
}
int top_label = static_cast<int>(label_gain_.size()) - 1;
if (k > num_data) { k = num_data; }
if (k > num_data) {
k = num_data;
}
// start from top label, and accumulate DCG
for (data_size_t j = 0; j < k; ++j) {
while (top_label > 0 && label_cnt[top_label] <= 0) {
......@@ -90,7 +94,9 @@ void DCGCalculator::CalMaxDCG(const std::vector<data_size_t>& ks,
// calculate k Max DCG by one pass
for (size_t i = 0; i < ks.size(); ++i) {
data_size_t cur_k = ks[i];
if (cur_k > num_data) { cur_k = num_data; }
if (cur_k > num_data) {
cur_k = num_data;
}
for (data_size_t j = cur_left; j < cur_k; ++j) {
while (top_label > 0 && label_cnt[top_label] <= 0) {
top_label -= 1;
......@@ -121,7 +127,9 @@ void DCGCalculator::CalDCG(const std::vector<data_size_t>& ks, const label_t* la
// calculate multi dcg by one pass
for (size_t i = 0; i < ks.size(); ++i) {
data_size_t cur_k = ks[i];
if (cur_k > num_data) { cur_k = num_data; }
if (cur_k > num_data) {
cur_k = num_data;
}
for (data_size_t j = cur_left; j < cur_k; ++j) {
data_size_t idx = sorted_idx[j];
cur_result += label_gain_[static_cast<int>(label[idx])] * discount_[j];
......
......@@ -86,7 +86,9 @@ class MapMetric:public Metric {
data_size_t cur_left = 0;
for (size_t i = 0; i < ks.size(); ++i) {
data_size_t cur_k = static_cast<data_size_t>(ks[i]);
if (cur_k > num_data) { cur_k = num_data; }
if (cur_k > num_data) {
cur_k = num_data;
}
for (data_size_t j = cur_left; j < cur_k; ++j) {
data_size_t idx = sorted_idx[j];
if (label[idx] > 0.5f) {
......
......@@ -68,7 +68,9 @@ RecursiveHalvingMap::RecursiveHalvingMap(int in_k, RecursiveHalvingNodeType _typ
RecursiveHalvingMap RecursiveHalvingMap::Construct(int rank, int num_machines) {
// construct all recursive halving map for all machines
int k = 0;
while ((1 << k) <= num_machines) { ++k; }
while ((1 << k) <= num_machines) {
++k;
}
// let 1 << k <= num_machines
--k;
// distance of each communication
......
......@@ -206,11 +206,17 @@ class LambdarankNDCG : public RankingObjective {
double sum_lambdas = 0.0;
// start accumulate lambdas by pairs that contain at least one document above truncation level
for (data_size_t i = 0; i < cnt - 1 && i < truncation_level_; ++i) {
if (score[sorted_idx[i]] == kMinScore) { continue; }
if (score[sorted_idx[i]] == kMinScore) {
continue;
}
for (data_size_t j = i + 1; j < cnt; ++j) {
if (score[sorted_idx[j]] == kMinScore) { continue; }
if (score[sorted_idx[j]] == kMinScore) {
continue;
}
// skip pairs with the same labels
if (label[sorted_idx[i]] == label[sorted_idx[j]]) { continue; }
if (label[sorted_idx[i]] == label[sorted_idx[j]]) {
continue;
}
data_size_t high_rank, low_rank;
if (label[sorted_idx[i]] > label[sorted_idx[j]]) {
high_rank = i;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment