"...git@developer.sourcefind.cn:tianlh/lightgbm-dcu.git" did not exist on "c53ac33b50760129da1b60886283d4918a21d5e7"
Unverified Commit 9b61166f authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

fixed cpplint errors about spaces and newlines (#2481)

parent 6036e07d
...@@ -71,8 +71,9 @@ namespace LightGBM { ...@@ -71,8 +71,9 @@ namespace LightGBM {
return true; return true;
} }
std::vector<double> GreedyFindBin(const double* distinct_values, const int* counts, std::vector<double> GreedyFindBin(const double* distinct_values, const int* counts,
int num_distinct_values, int max_bin, size_t total_cnt, int min_data_in_bin) { int num_distinct_values, int max_bin,
size_t total_cnt, int min_data_in_bin) {
std::vector<double> bin_upper_bound; std::vector<double> bin_upper_bound;
CHECK(max_bin > 0); CHECK(max_bin > 0);
if (num_distinct_values <= max_bin) { if (num_distinct_values <= max_bin) {
...@@ -150,7 +151,9 @@ namespace LightGBM { ...@@ -150,7 +151,9 @@ namespace LightGBM {
} }
std::vector<double> FindBinWithPredefinedBin(const double* distinct_values, const int* counts, std::vector<double> FindBinWithPredefinedBin(const double* distinct_values, const int* counts,
int num_distinct_values, int max_bin, size_t total_sample_cnt, int min_data_in_bin, const std::vector<double>& forced_upper_bounds) { int num_distinct_values, int max_bin,
size_t total_sample_cnt, int min_data_in_bin,
const std::vector<double>& forced_upper_bounds) {
std::vector<double> bin_upper_bound; std::vector<double> bin_upper_bound;
// get list of distinct values // get list of distinct values
...@@ -246,8 +249,8 @@ namespace LightGBM { ...@@ -246,8 +249,8 @@ namespace LightGBM {
return bin_upper_bound; return bin_upper_bound;
} }
std::vector<double> FindBinWithZeroAsOneBin(const double* distinct_values, const int* counts, int num_distinct_values, std::vector<double> FindBinWithZeroAsOneBin(const double* distinct_values, const int* counts, int num_distinct_values,
int max_bin, size_t total_sample_cnt, int min_data_in_bin) { int max_bin, size_t total_sample_cnt, int min_data_in_bin) {
std::vector<double> bin_upper_bound; std::vector<double> bin_upper_bound;
int left_cnt_data = 0; int left_cnt_data = 0;
int cnt_zero = 0; int cnt_zero = 0;
...@@ -305,7 +308,8 @@ namespace LightGBM { ...@@ -305,7 +308,8 @@ namespace LightGBM {
} }
std::vector<double> FindBinWithZeroAsOneBin(const double* distinct_values, const int* counts, int num_distinct_values, std::vector<double> FindBinWithZeroAsOneBin(const double* distinct_values, const int* counts, int num_distinct_values,
int max_bin, size_t total_sample_cnt, int min_data_in_bin, const std::vector<double>& forced_upper_bounds) { int max_bin, size_t total_sample_cnt, int min_data_in_bin,
const std::vector<double>& forced_upper_bounds) {
if (forced_upper_bounds.empty()) { if (forced_upper_bounds.empty()) {
return FindBinWithZeroAsOneBin(distinct_values, counts, num_distinct_values, max_bin, total_sample_cnt, min_data_in_bin); return FindBinWithZeroAsOneBin(distinct_values, counts, num_distinct_values, max_bin, total_sample_cnt, min_data_in_bin);
} else { } else {
...@@ -315,8 +319,9 @@ namespace LightGBM { ...@@ -315,8 +319,9 @@ namespace LightGBM {
} }
void BinMapper::FindBin(double* values, int num_sample_values, size_t total_sample_cnt, void BinMapper::FindBin(double* values, int num_sample_values, size_t total_sample_cnt,
int max_bin, int min_data_in_bin, int min_split_data, BinType bin_type, bool use_missing, bool zero_as_missing, int max_bin, int min_data_in_bin, int min_split_data, BinType bin_type,
const std::vector<double>& forced_upper_bounds) { bool use_missing, bool zero_as_missing,
const std::vector<double>& forced_upper_bounds) {
int na_cnt = 0; int na_cnt = 0;
int tmp_num_sample_values = 0; int tmp_num_sample_values = 0;
for (int i = 0; i < num_sample_values; ++i) { for (int i = 0; i < num_sample_values; ++i) {
......
...@@ -721,7 +721,7 @@ void Dataset::SaveBinaryFile(const char* bin_filename) { ...@@ -721,7 +721,7 @@ void Dataset::SaveBinaryFile(const char* bin_filename) {
for (int i = 0; i < num_total_features_; ++i) { for (int i = 0; i < num_total_features_; ++i) {
int num_bounds = static_cast<int>(forced_bin_bounds_[i].size()); int num_bounds = static_cast<int>(forced_bin_bounds_[i].size());
writer->Write(&num_bounds, sizeof(int)); writer->Write(&num_bounds, sizeof(int));
for (size_t j = 0; j < forced_bin_bounds_[i].size(); ++j) { for (size_t j = 0; j < forced_bin_bounds_[i].size(); ++j) {
writer->Write(&forced_bin_bounds_[i][j], sizeof(double)); writer->Write(&forced_bin_bounds_[i][j], sizeof(double));
} }
......
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
*/ */
#include <LightGBM/dataset_loader.h> #include <LightGBM/dataset_loader.h>
#include <LightGBM/json11.hpp>
#include <LightGBM/network.h> #include <LightGBM/network.h>
#include <LightGBM/utils/array_args.h> #include <LightGBM/utils/array_args.h>
#include <LightGBM/utils/log.h> #include <LightGBM/utils/log.h>
...@@ -12,6 +11,8 @@ ...@@ -12,6 +11,8 @@
#include <fstream> #include <fstream>
#include <LightGBM/json11.hpp>
using namespace json11; using namespace json11;
namespace LightGBM { namespace LightGBM {
...@@ -270,7 +271,9 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename, ...@@ -270,7 +271,9 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename,
return dataset.release(); return dataset.release();
} }
Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* bin_filename, int rank, int num_machines, int* num_global_data, std::vector<data_size_t>* used_data_indices) { Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* bin_filename,
int rank, int num_machines, int* num_global_data,
std::vector<data_size_t>* used_data_indices) {
auto dataset = std::unique_ptr<Dataset>(new Dataset()); auto dataset = std::unique_ptr<Dataset>(new Dataset());
auto reader = VirtualFileReader::Make(bin_filename); auto reader = VirtualFileReader::Make(bin_filename);
dataset->data_filename_ = data_filename; dataset->data_filename_ = data_filename;
...@@ -470,13 +473,11 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b ...@@ -470,13 +473,11 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b
mem_ptr += sizeof(int); mem_ptr += sizeof(int);
dataset->forced_bin_bounds_[i] = std::vector<double>(); dataset->forced_bin_bounds_[i] = std::vector<double>();
const double* tmp_ptr_forced_bounds = reinterpret_cast<const double*>(mem_ptr); const double* tmp_ptr_forced_bounds = reinterpret_cast<const double*>(mem_ptr);
for (int j = 0; j < num_bounds; ++j) { for (int j = 0; j < num_bounds; ++j) {
double bound = tmp_ptr_forced_bounds[j]; double bound = tmp_ptr_forced_bounds[j];
dataset->forced_bin_bounds_[i].push_back(bound); dataset->forced_bin_bounds_[i].push_back(bound);
} }
mem_ptr += num_bounds * sizeof(double); mem_ptr += num_bounds * sizeof(double);
} }
// read size of meta data // read size of meta data
...@@ -661,7 +662,7 @@ Dataset* DatasetLoader::CostructFromSampleData(double** sample_values, ...@@ -661,7 +662,7 @@ Dataset* DatasetLoader::CostructFromSampleData(double** sample_values,
if (config_.max_bin_by_feature.empty()) { if (config_.max_bin_by_feature.empty()) {
bin_mappers[i]->FindBin(sample_values[start[rank] + i], num_per_col[start[rank] + i], bin_mappers[i]->FindBin(sample_values[start[rank] + i], num_per_col[start[rank] + i],
total_sample_size, config_.max_bin, config_.min_data_in_bin, total_sample_size, config_.max_bin, config_.min_data_in_bin,
filter_cnt, bin_type, config_.use_missing, config_.zero_as_missing, filter_cnt, bin_type, config_.use_missing, config_.zero_as_missing,
forced_bin_bounds[i]); forced_bin_bounds[i]);
} else { } else {
bin_mappers[i]->FindBin(sample_values[start[rank] + i], num_per_col[start[rank] + i], bin_mappers[i]->FindBin(sample_values[start[rank] + i], num_per_col[start[rank] + i],
...@@ -821,7 +822,9 @@ std::vector<std::string> DatasetLoader::SampleTextDataFromMemory(const std::vect ...@@ -821,7 +822,9 @@ std::vector<std::string> DatasetLoader::SampleTextDataFromMemory(const std::vect
return out; return out;
} }
std::vector<std::string> DatasetLoader::SampleTextDataFromFile(const char* filename, const Metadata& metadata, int rank, int num_machines, int* num_global_data, std::vector<data_size_t>* used_data_indices) { std::vector<std::string> DatasetLoader::SampleTextDataFromFile(const char* filename, const Metadata& metadata,
int rank, int num_machines, int* num_global_data,
std::vector<data_size_t>* used_data_indices) {
const data_size_t sample_cnt = static_cast<data_size_t>(config_.bin_construct_sample_cnt); const data_size_t sample_cnt = static_cast<data_size_t>(config_.bin_construct_sample_cnt);
TextReader<data_size_t> text_reader(filename, config_.header); TextReader<data_size_t> text_reader(filename, config_.header);
std::vector<std::string> out_data; std::vector<std::string> out_data;
...@@ -867,7 +870,9 @@ std::vector<std::string> DatasetLoader::SampleTextDataFromFile(const char* filen ...@@ -867,7 +870,9 @@ std::vector<std::string> DatasetLoader::SampleTextDataFromFile(const char* filen
return out_data; return out_data;
} }
void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines, const std::vector<std::string>& sample_data, const Parser* parser, Dataset* dataset) { void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
const std::vector<std::string>& sample_data,
const Parser* parser, Dataset* dataset) {
std::vector<std::vector<double>> sample_values; std::vector<std::vector<double>> sample_values;
std::vector<std::vector<int>> sample_indices; std::vector<std::vector<int>> sample_indices;
std::vector<std::pair<int, double>> oneline_features; std::vector<std::pair<int, double>> oneline_features;
...@@ -906,7 +911,8 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines, ...@@ -906,7 +911,8 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
// get forced split // get forced split
std::string forced_bins_path = config_.forcedbins_filename; std::string forced_bins_path = config_.forcedbins_filename;
std::vector<std::vector<double>> forced_bin_bounds = DatasetLoader::GetForcedBins(forced_bins_path, dataset->num_total_features_, std::vector<std::vector<double>> forced_bin_bounds = DatasetLoader::GetForcedBins(forced_bins_path,
dataset->num_total_features_,
categorical_features_); categorical_features_);
// check the range of label_idx, weight_idx and group_idx // check the range of label_idx, weight_idx and group_idx
...@@ -993,7 +999,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines, ...@@ -993,7 +999,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
bin_mappers[i]->FindBin(sample_values[start[rank] + i].data(), bin_mappers[i]->FindBin(sample_values[start[rank] + i].data(),
static_cast<int>(sample_values[start[rank] + i].size()), static_cast<int>(sample_values[start[rank] + i].size()),
sample_data.size(), config_.max_bin, config_.min_data_in_bin, sample_data.size(), config_.max_bin, config_.min_data_in_bin,
filter_cnt, bin_type, config_.use_missing, config_.zero_as_missing, filter_cnt, bin_type, config_.use_missing, config_.zero_as_missing,
forced_bin_bounds[i]); forced_bin_bounds[i]);
} else { } else {
bin_mappers[i]->FindBin(sample_values[start[rank] + i].data(), bin_mappers[i]->FindBin(sample_values[start[rank] + i].data(),
...@@ -1149,7 +1155,8 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat ...@@ -1149,7 +1155,8 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat
} }
/*! \brief Extract local features from file */ /*! \brief Extract local features from file */
void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser* parser, const std::vector<data_size_t>& used_data_indices, Dataset* dataset) { void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser* parser,
const std::vector<data_size_t>& used_data_indices, Dataset* dataset) {
std::vector<double> init_score; std::vector<double> init_score;
if (predict_fun_ != nullptr) { if (predict_fun_ != nullptr) {
init_score = std::vector<double>(dataset->num_data_ * num_class_); init_score = std::vector<double>(dataset->num_data_ * num_class_);
......
...@@ -187,7 +187,7 @@ void Linkers::Construct() { ...@@ -187,7 +187,7 @@ void Linkers::Construct() {
listener_->Listen(incoming_cnt); listener_->Listen(incoming_cnt);
std::thread listen_thread(&Linkers::ListenThread, this, incoming_cnt); std::thread listen_thread(&Linkers::ListenThread, this, incoming_cnt);
const int connect_fail_retry_cnt = 20; const int connect_fail_retry_cnt = 20;
const int connect_fail_retry_first_delay_interval = 200; // 0.2 s const int connect_fail_retry_first_delay_interval = 200; // 0.2 s
const float connect_fail_retry_delay_factor = 1.3f; const float connect_fail_retry_delay_factor = 1.3f;
// start connect // start connect
for (auto it = need_connect.begin(); it != need_connect.end(); ++it) { for (auto it = need_connect.begin(); it != need_connect.end(); ++it) {
......
...@@ -36,7 +36,7 @@ class CostEfficientGradientBoosting { ...@@ -36,7 +36,7 @@ class CostEfficientGradientBoosting {
is_feature_used_in_split_.clear(); is_feature_used_in_split_.clear();
is_feature_used_in_split_.resize(train_data->num_features()); is_feature_used_in_split_.resize(train_data->num_features());
if (!tree_learner_->config_->cegb_penalty_feature_coupled.empty() if (!tree_learner_->config_->cegb_penalty_feature_coupled.empty()
&& tree_learner_->config_->cegb_penalty_feature_coupled.size() != static_cast<size_t>(train_data->num_total_features())) { && tree_learner_->config_->cegb_penalty_feature_coupled.size() != static_cast<size_t>(train_data->num_total_features())) {
Log::Fatal("cegb_penalty_feature_coupled should be the same size as feature number."); Log::Fatal("cegb_penalty_feature_coupled should be the same size as feature number.");
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment