Unverified Commit 9b61166f authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

fixed cpplint errors about spaces and newlines (#2481)

parent 6036e07d
......@@ -71,8 +71,9 @@ namespace LightGBM {
return true;
}
std::vector<double> GreedyFindBin(const double* distinct_values, const int* counts,
int num_distinct_values, int max_bin, size_t total_cnt, int min_data_in_bin) {
std::vector<double> GreedyFindBin(const double* distinct_values, const int* counts,
int num_distinct_values, int max_bin,
size_t total_cnt, int min_data_in_bin) {
std::vector<double> bin_upper_bound;
CHECK(max_bin > 0);
if (num_distinct_values <= max_bin) {
......@@ -150,7 +151,9 @@ namespace LightGBM {
}
std::vector<double> FindBinWithPredefinedBin(const double* distinct_values, const int* counts,
int num_distinct_values, int max_bin, size_t total_sample_cnt, int min_data_in_bin, const std::vector<double>& forced_upper_bounds) {
int num_distinct_values, int max_bin,
size_t total_sample_cnt, int min_data_in_bin,
const std::vector<double>& forced_upper_bounds) {
std::vector<double> bin_upper_bound;
// get list of distinct values
......@@ -246,8 +249,8 @@ namespace LightGBM {
return bin_upper_bound;
}
std::vector<double> FindBinWithZeroAsOneBin(const double* distinct_values, const int* counts, int num_distinct_values,
int max_bin, size_t total_sample_cnt, int min_data_in_bin) {
std::vector<double> FindBinWithZeroAsOneBin(const double* distinct_values, const int* counts, int num_distinct_values,
int max_bin, size_t total_sample_cnt, int min_data_in_bin) {
std::vector<double> bin_upper_bound;
int left_cnt_data = 0;
int cnt_zero = 0;
......@@ -305,7 +308,8 @@ namespace LightGBM {
}
std::vector<double> FindBinWithZeroAsOneBin(const double* distinct_values, const int* counts, int num_distinct_values,
int max_bin, size_t total_sample_cnt, int min_data_in_bin, const std::vector<double>& forced_upper_bounds) {
int max_bin, size_t total_sample_cnt, int min_data_in_bin,
const std::vector<double>& forced_upper_bounds) {
if (forced_upper_bounds.empty()) {
return FindBinWithZeroAsOneBin(distinct_values, counts, num_distinct_values, max_bin, total_sample_cnt, min_data_in_bin);
} else {
......@@ -315,8 +319,9 @@ namespace LightGBM {
}
void BinMapper::FindBin(double* values, int num_sample_values, size_t total_sample_cnt,
int max_bin, int min_data_in_bin, int min_split_data, BinType bin_type, bool use_missing, bool zero_as_missing,
const std::vector<double>& forced_upper_bounds) {
int max_bin, int min_data_in_bin, int min_split_data, BinType bin_type,
bool use_missing, bool zero_as_missing,
const std::vector<double>& forced_upper_bounds) {
int na_cnt = 0;
int tmp_num_sample_values = 0;
for (int i = 0; i < num_sample_values; ++i) {
......
......@@ -721,7 +721,7 @@ void Dataset::SaveBinaryFile(const char* bin_filename) {
for (int i = 0; i < num_total_features_; ++i) {
int num_bounds = static_cast<int>(forced_bin_bounds_[i].size());
writer->Write(&num_bounds, sizeof(int));
for (size_t j = 0; j < forced_bin_bounds_[i].size(); ++j) {
writer->Write(&forced_bin_bounds_[i][j], sizeof(double));
}
......
......@@ -4,7 +4,6 @@
*/
#include <LightGBM/dataset_loader.h>
#include <LightGBM/json11.hpp>
#include <LightGBM/network.h>
#include <LightGBM/utils/array_args.h>
#include <LightGBM/utils/log.h>
......@@ -12,6 +11,8 @@
#include <fstream>
#include <LightGBM/json11.hpp>
using namespace json11;
namespace LightGBM {
......@@ -270,7 +271,9 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename,
return dataset.release();
}
Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* bin_filename, int rank, int num_machines, int* num_global_data, std::vector<data_size_t>* used_data_indices) {
Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* bin_filename,
int rank, int num_machines, int* num_global_data,
std::vector<data_size_t>* used_data_indices) {
auto dataset = std::unique_ptr<Dataset>(new Dataset());
auto reader = VirtualFileReader::Make(bin_filename);
dataset->data_filename_ = data_filename;
......@@ -470,13 +473,11 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b
mem_ptr += sizeof(int);
dataset->forced_bin_bounds_[i] = std::vector<double>();
const double* tmp_ptr_forced_bounds = reinterpret_cast<const double*>(mem_ptr);
for (int j = 0; j < num_bounds; ++j) {
double bound = tmp_ptr_forced_bounds[j];
dataset->forced_bin_bounds_[i].push_back(bound);
}
mem_ptr += num_bounds * sizeof(double);
}
// read size of meta data
......@@ -661,7 +662,7 @@ Dataset* DatasetLoader::CostructFromSampleData(double** sample_values,
if (config_.max_bin_by_feature.empty()) {
bin_mappers[i]->FindBin(sample_values[start[rank] + i], num_per_col[start[rank] + i],
total_sample_size, config_.max_bin, config_.min_data_in_bin,
filter_cnt, bin_type, config_.use_missing, config_.zero_as_missing,
filter_cnt, bin_type, config_.use_missing, config_.zero_as_missing,
forced_bin_bounds[i]);
} else {
bin_mappers[i]->FindBin(sample_values[start[rank] + i], num_per_col[start[rank] + i],
......@@ -821,7 +822,9 @@ std::vector<std::string> DatasetLoader::SampleTextDataFromMemory(const std::vect
return out;
}
std::vector<std::string> DatasetLoader::SampleTextDataFromFile(const char* filename, const Metadata& metadata, int rank, int num_machines, int* num_global_data, std::vector<data_size_t>* used_data_indices) {
std::vector<std::string> DatasetLoader::SampleTextDataFromFile(const char* filename, const Metadata& metadata,
int rank, int num_machines, int* num_global_data,
std::vector<data_size_t>* used_data_indices) {
const data_size_t sample_cnt = static_cast<data_size_t>(config_.bin_construct_sample_cnt);
TextReader<data_size_t> text_reader(filename, config_.header);
std::vector<std::string> out_data;
......@@ -867,7 +870,9 @@ std::vector<std::string> DatasetLoader::SampleTextDataFromFile(const char* filen
return out_data;
}
void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines, const std::vector<std::string>& sample_data, const Parser* parser, Dataset* dataset) {
void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
const std::vector<std::string>& sample_data,
const Parser* parser, Dataset* dataset) {
std::vector<std::vector<double>> sample_values;
std::vector<std::vector<int>> sample_indices;
std::vector<std::pair<int, double>> oneline_features;
......@@ -906,7 +911,8 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
// get forced split
std::string forced_bins_path = config_.forcedbins_filename;
std::vector<std::vector<double>> forced_bin_bounds = DatasetLoader::GetForcedBins(forced_bins_path, dataset->num_total_features_,
std::vector<std::vector<double>> forced_bin_bounds = DatasetLoader::GetForcedBins(forced_bins_path,
dataset->num_total_features_,
categorical_features_);
// check the range of label_idx, weight_idx and group_idx
......@@ -993,7 +999,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
bin_mappers[i]->FindBin(sample_values[start[rank] + i].data(),
static_cast<int>(sample_values[start[rank] + i].size()),
sample_data.size(), config_.max_bin, config_.min_data_in_bin,
filter_cnt, bin_type, config_.use_missing, config_.zero_as_missing,
filter_cnt, bin_type, config_.use_missing, config_.zero_as_missing,
forced_bin_bounds[i]);
} else {
bin_mappers[i]->FindBin(sample_values[start[rank] + i].data(),
......@@ -1149,7 +1155,8 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat
}
/*! \brief Extract local features from file */
void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser* parser, const std::vector<data_size_t>& used_data_indices, Dataset* dataset) {
void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser* parser,
const std::vector<data_size_t>& used_data_indices, Dataset* dataset) {
std::vector<double> init_score;
if (predict_fun_ != nullptr) {
init_score = std::vector<double>(dataset->num_data_ * num_class_);
......
......@@ -187,7 +187,7 @@ void Linkers::Construct() {
listener_->Listen(incoming_cnt);
std::thread listen_thread(&Linkers::ListenThread, this, incoming_cnt);
const int connect_fail_retry_cnt = 20;
const int connect_fail_retry_first_delay_interval = 200; // 0.2 s
const int connect_fail_retry_first_delay_interval = 200; // 0.2 s
const float connect_fail_retry_delay_factor = 1.3f;
// start connect
for (auto it = need_connect.begin(); it != need_connect.end(); ++it) {
......
......@@ -36,7 +36,7 @@ class CostEfficientGradientBoosting {
is_feature_used_in_split_.clear();
is_feature_used_in_split_.resize(train_data->num_features());
if (!tree_learner_->config_->cegb_penalty_feature_coupled.empty()
if (!tree_learner_->config_->cegb_penalty_feature_coupled.empty()
&& tree_learner_->config_->cegb_penalty_feature_coupled.size() != static_cast<size_t>(train_data->num_total_features())) {
Log::Fatal("cegb_penalty_feature_coupled should be the same size as feature number.");
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment