Unverified Commit f1a14869 authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

fix many cpp lint errors (#2426)

* fix many cpp lint errors

* indent

* fix bug

* fix more

* fix gpu

* more fixes
parent 4f89cc10
...@@ -59,9 +59,9 @@ int GetConfilctCount(const std::vector<bool>& mark, const int* indices, int num_ ...@@ -59,9 +59,9 @@ int GetConfilctCount(const std::vector<bool>& mark, const int* indices, int num_
} }
return ret; return ret;
} }
void MarkUsed(std::vector<bool>& mark, const int* indices, int num_indices) { void MarkUsed(std::vector<bool>* mark, const int* indices, int num_indices) {
for (int i = 0; i < num_indices; ++i) { for (int i = 0; i < num_indices; ++i) {
mark[indices[i]] = true; mark->at(indices[i]) = true;
} }
} }
...@@ -115,7 +115,7 @@ std::vector<std::vector<int>> FindGroups(const std::vector<std::unique_ptr<BinMa ...@@ -115,7 +115,7 @@ std::vector<std::vector<int>> FindGroups(const std::vector<std::unique_ptr<BinMa
features_in_group[gid].push_back(fidx); features_in_group[gid].push_back(fidx);
group_conflict_cnt[gid] += cnt; group_conflict_cnt[gid] += cnt;
group_non_zero_cnt[gid] += cur_non_zero_cnt - cnt; group_non_zero_cnt[gid] += cur_non_zero_cnt - cnt;
MarkUsed(conflict_marks[gid], sample_indices[fidx], num_per_col[fidx]); MarkUsed(&conflict_marks[gid], sample_indices[fidx], num_per_col[fidx]);
if (is_use_gpu) { if (is_use_gpu) {
group_num_bin[gid] += bin_mappers[fidx]->num_bin() + (bin_mappers[fidx]->GetDefaultBin() == 0 ? -1 : 0); group_num_bin[gid] += bin_mappers[fidx]->num_bin() + (bin_mappers[fidx]->GetDefaultBin() == 0 ? -1 : 0);
} }
...@@ -127,7 +127,7 @@ std::vector<std::vector<int>> FindGroups(const std::vector<std::unique_ptr<BinMa ...@@ -127,7 +127,7 @@ std::vector<std::vector<int>> FindGroups(const std::vector<std::unique_ptr<BinMa
features_in_group.back().push_back(fidx); features_in_group.back().push_back(fidx);
group_conflict_cnt.push_back(0); group_conflict_cnt.push_back(0);
conflict_marks.emplace_back(total_sample_cnt, false); conflict_marks.emplace_back(total_sample_cnt, false);
MarkUsed(conflict_marks.back(), sample_indices[fidx], num_per_col[fidx]); MarkUsed(&(conflict_marks.back()), sample_indices[fidx], num_per_col[fidx]);
group_non_zero_cnt.emplace_back(cur_non_zero_cnt); group_non_zero_cnt.emplace_back(cur_non_zero_cnt);
if (is_use_gpu) { if (is_use_gpu) {
group_num_bin.push_back(1 + bin_mappers[fidx]->num_bin() + (bin_mappers[fidx]->GetDefaultBin() == 0 ? -1 : 0)); group_num_bin.push_back(1 + bin_mappers[fidx]->num_bin() + (bin_mappers[fidx]->GetDefaultBin() == 0 ? -1 : 0));
...@@ -137,7 +137,7 @@ std::vector<std::vector<int>> FindGroups(const std::vector<std::unique_ptr<BinMa ...@@ -137,7 +137,7 @@ std::vector<std::vector<int>> FindGroups(const std::vector<std::unique_ptr<BinMa
return features_in_group; return features_in_group;
} }
std::vector<std::vector<int>> FastFeatureBundling(std::vector<std::unique_ptr<BinMapper>>& bin_mappers, std::vector<std::vector<int>> FastFeatureBundling(const std::vector<std::unique_ptr<BinMapper>>& bin_mappers,
int** sample_indices, int** sample_indices,
const int* num_per_col, const int* num_per_col,
size_t total_sample_cnt, size_t total_sample_cnt,
...@@ -213,17 +213,17 @@ std::vector<std::vector<int>> FastFeatureBundling(std::vector<std::unique_ptr<Bi ...@@ -213,17 +213,17 @@ std::vector<std::vector<int>> FastFeatureBundling(std::vector<std::unique_ptr<Bi
} }
void Dataset::Construct( void Dataset::Construct(
std::vector<std::unique_ptr<BinMapper>>& bin_mappers, std::vector<std::unique_ptr<BinMapper>>* bin_mappers,
int** sample_non_zero_indices, int** sample_non_zero_indices,
const int* num_per_col, const int* num_per_col,
size_t total_sample_cnt, size_t total_sample_cnt,
const Config& io_config) { const Config& io_config) {
num_total_features_ = static_cast<int>(bin_mappers.size()); num_total_features_ = static_cast<int>(bin_mappers->size());
sparse_threshold_ = io_config.sparse_threshold; sparse_threshold_ = io_config.sparse_threshold;
// get num_features // get num_features
std::vector<int> used_features; std::vector<int> used_features;
for (int i = 0; i < static_cast<int>(bin_mappers.size()); ++i) { for (int i = 0; i < static_cast<int>(bin_mappers->size()); ++i) {
if (bin_mappers[i] != nullptr && !bin_mappers[i]->is_trivial()) { if (bin_mappers->at(i) != nullptr && !bin_mappers->at(i)->is_trivial()) {
used_features.emplace_back(i); used_features.emplace_back(i);
} }
} }
...@@ -233,7 +233,7 @@ void Dataset::Construct( ...@@ -233,7 +233,7 @@ void Dataset::Construct(
auto features_in_group = NoGroup(used_features); auto features_in_group = NoGroup(used_features);
if (io_config.enable_bundle && !used_features.empty()) { if (io_config.enable_bundle && !used_features.empty()) {
features_in_group = FastFeatureBundling(bin_mappers, features_in_group = FastFeatureBundling(*bin_mappers,
sample_non_zero_indices, num_per_col, total_sample_cnt, sample_non_zero_indices, num_per_col, total_sample_cnt,
used_features, io_config.max_conflict_rate, used_features, io_config.max_conflict_rate,
num_data_, io_config.min_data_in_leaf, num_data_, io_config.min_data_in_leaf,
...@@ -261,11 +261,11 @@ void Dataset::Construct( ...@@ -261,11 +261,11 @@ void Dataset::Construct(
real_feature_idx_[cur_fidx] = real_fidx; real_feature_idx_[cur_fidx] = real_fidx;
feature2group_[cur_fidx] = i; feature2group_[cur_fidx] = i;
feature2subfeature_[cur_fidx] = j; feature2subfeature_[cur_fidx] = j;
cur_bin_mappers.emplace_back(bin_mappers[real_fidx].release()); cur_bin_mappers.emplace_back(bin_mappers->at(real_fidx).release());
++cur_fidx; ++cur_fidx;
} }
feature_groups_.emplace_back(std::unique_ptr<FeatureGroup>( feature_groups_.emplace_back(std::unique_ptr<FeatureGroup>(
new FeatureGroup(cur_cnt_features, cur_bin_mappers, num_data_, sparse_threshold_, new FeatureGroup(cur_cnt_features, &cur_bin_mappers, num_data_, sparse_threshold_,
io_config.is_enable_sparse))); io_config.is_enable_sparse)));
} }
feature_groups_.shrink_to_fit(); feature_groups_.shrink_to_fit();
...@@ -413,7 +413,7 @@ void Dataset::CopyFeatureMapperFrom(const Dataset* dataset) { ...@@ -413,7 +413,7 @@ void Dataset::CopyFeatureMapperFrom(const Dataset* dataset) {
} }
feature_groups_.emplace_back(new FeatureGroup( feature_groups_.emplace_back(new FeatureGroup(
dataset->feature_groups_[i]->num_feature_, dataset->feature_groups_[i]->num_feature_,
bin_mappers, &bin_mappers,
num_data_, num_data_,
dataset->feature_groups_[i]->is_sparse_)); dataset->feature_groups_[i]->is_sparse_));
} }
...@@ -446,7 +446,7 @@ void Dataset::CreateValid(const Dataset* dataset) { ...@@ -446,7 +446,7 @@ void Dataset::CreateValid(const Dataset* dataset) {
bin_mappers.emplace_back(new BinMapper(*(dataset->FeatureBinMapper(i)))); bin_mappers.emplace_back(new BinMapper(*(dataset->FeatureBinMapper(i))));
feature_groups_.emplace_back(new FeatureGroup( feature_groups_.emplace_back(new FeatureGroup(
1, 1,
bin_mappers, &bin_mappers,
num_data_, num_data_,
sparse_threshold_, sparse_threshold_,
is_enable_sparse)); is_enable_sparse));
...@@ -778,7 +778,7 @@ void Dataset::DumpTextFile(const char* text_filename) { ...@@ -778,7 +778,7 @@ void Dataset::DumpTextFile(const char* text_filename) {
void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used, void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
const data_size_t* data_indices, data_size_t num_data, const data_size_t* data_indices, data_size_t num_data,
int leaf_idx, int leaf_idx,
std::vector<std::unique_ptr<OrderedBin>>& ordered_bins, std::vector<std::unique_ptr<OrderedBin>>* ordered_bins,
const score_t* gradients, const score_t* hessians, const score_t* gradients, const score_t* hessians,
score_t* ordered_gradients, score_t* ordered_hessians, score_t* ordered_gradients, score_t* ordered_hessians,
bool is_constant_hessian, bool is_constant_hessian,
...@@ -830,9 +830,9 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used, ...@@ -830,9 +830,9 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
// feature is not used // feature is not used
auto data_ptr = hist_data + group_bin_boundaries_[group]; auto data_ptr = hist_data + group_bin_boundaries_[group];
const int num_bin = feature_groups_[group]->num_total_bin_; const int num_bin = feature_groups_[group]->num_total_bin_;
std::memset((void*)(data_ptr + 1), 0, (num_bin - 1) * sizeof(HistogramBinEntry)); std::memset(reinterpret_cast<void*>(data_ptr + 1), 0, (num_bin - 1) * sizeof(HistogramBinEntry));
// construct histograms for smaller leaf // construct histograms for smaller leaf
if (ordered_bins[group] == nullptr) { if (ordered_bins->at(group) == nullptr) {
// if not use ordered bin // if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram( feature_groups_[group]->bin_data_->ConstructHistogram(
data_indices, data_indices,
...@@ -842,7 +842,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used, ...@@ -842,7 +842,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
data_ptr); data_ptr);
} else { } else {
// used ordered bin // used ordered bin
ordered_bins[group]->ConstructHistogram(leaf_idx, ordered_bins->at(group)->ConstructHistogram(leaf_idx,
gradients, gradients,
hessians, hessians,
data_ptr); data_ptr);
...@@ -859,9 +859,9 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used, ...@@ -859,9 +859,9 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
// feature is not used // feature is not used
auto data_ptr = hist_data + group_bin_boundaries_[group]; auto data_ptr = hist_data + group_bin_boundaries_[group];
const int num_bin = feature_groups_[group]->num_total_bin_; const int num_bin = feature_groups_[group]->num_total_bin_;
std::memset((void*)(data_ptr + 1), 0, (num_bin - 1) * sizeof(HistogramBinEntry)); std::memset(reinterpret_cast<void*>(data_ptr + 1), 0, (num_bin - 1) * sizeof(HistogramBinEntry));
// construct histograms for smaller leaf // construct histograms for smaller leaf
if (ordered_bins[group] == nullptr) { if (ordered_bins->at(group) == nullptr) {
// if not use ordered bin // if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram( feature_groups_[group]->bin_data_->ConstructHistogram(
data_indices, data_indices,
...@@ -870,7 +870,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used, ...@@ -870,7 +870,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
data_ptr); data_ptr);
} else { } else {
// used ordered bin // used ordered bin
ordered_bins[group]->ConstructHistogram(leaf_idx, ordered_bins->at(group)->ConstructHistogram(leaf_idx,
gradients, gradients,
data_ptr); data_ptr);
} }
...@@ -892,9 +892,9 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used, ...@@ -892,9 +892,9 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
// feature is not used // feature is not used
auto data_ptr = hist_data + group_bin_boundaries_[group]; auto data_ptr = hist_data + group_bin_boundaries_[group];
const int num_bin = feature_groups_[group]->num_total_bin_; const int num_bin = feature_groups_[group]->num_total_bin_;
std::memset((void*)(data_ptr + 1), 0, (num_bin - 1) * sizeof(HistogramBinEntry)); std::memset(reinterpret_cast<void*>(data_ptr + 1), 0, (num_bin - 1) * sizeof(HistogramBinEntry));
// construct histograms for smaller leaf // construct histograms for smaller leaf
if (ordered_bins[group] == nullptr) { if (ordered_bins->at(group) == nullptr) {
// if not use ordered bin // if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram( feature_groups_[group]->bin_data_->ConstructHistogram(
num_data, num_data,
...@@ -903,7 +903,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used, ...@@ -903,7 +903,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
data_ptr); data_ptr);
} else { } else {
// used ordered bin // used ordered bin
ordered_bins[group]->ConstructHistogram(leaf_idx, ordered_bins->at(group)->ConstructHistogram(leaf_idx,
gradients, gradients,
hessians, hessians,
data_ptr); data_ptr);
...@@ -920,9 +920,9 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used, ...@@ -920,9 +920,9 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
// feature is not used // feature is not used
auto data_ptr = hist_data + group_bin_boundaries_[group]; auto data_ptr = hist_data + group_bin_boundaries_[group];
const int num_bin = feature_groups_[group]->num_total_bin_; const int num_bin = feature_groups_[group]->num_total_bin_;
std::memset((void*)(data_ptr + 1), 0, (num_bin - 1) * sizeof(HistogramBinEntry)); std::memset(reinterpret_cast<void*>(data_ptr + 1), 0, (num_bin - 1) * sizeof(HistogramBinEntry));
// construct histograms for smaller leaf // construct histograms for smaller leaf
if (ordered_bins[group] == nullptr) { if (ordered_bins->at(group) == nullptr) {
// if not use ordered bin // if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram( feature_groups_[group]->bin_data_->ConstructHistogram(
num_data, num_data,
...@@ -930,7 +930,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used, ...@@ -930,7 +930,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
data_ptr); data_ptr);
} else { } else {
// used ordered bin // used ordered bin
ordered_bins[group]->ConstructHistogram(leaf_idx, ordered_bins->at(group)->ConstructHistogram(leaf_idx,
gradients, gradients,
data_ptr); data_ptr);
} }
...@@ -967,32 +967,32 @@ void Dataset::FixHistogram(int feature_idx, double sum_gradient, double sum_hess ...@@ -967,32 +967,32 @@ void Dataset::FixHistogram(int feature_idx, double sum_gradient, double sum_hess
} }
template<typename T> template<typename T>
void PushVector(std::vector<T>& dest, const std::vector<T>& src) { void PushVector(std::vector<T>* dest, const std::vector<T>& src) {
dest.reserve(dest.size() + src.size()); dest->reserve(dest->size() + src.size());
for (auto i : src) { for (auto i : src) {
dest.push_back(i); dest->push_back(i);
} }
} }
template<typename T> template<typename T>
void PushOffset(std::vector<T>& dest, const std::vector<T>& src, const T& offset) { void PushOffset(std::vector<T>* dest, const std::vector<T>& src, const T& offset) {
dest.reserve(dest.size() + src.size()); dest->reserve(dest->size() + src.size());
for (auto i : src) { for (auto i : src) {
dest.push_back(i + offset); dest->push_back(i + offset);
} }
} }
template<typename T> template<typename T>
void PushClearIfEmpty(std::vector<T>& dest, const size_t dest_len, const std::vector<T>& src, const size_t src_len, const T& deflt) { void PushClearIfEmpty(std::vector<T>* dest, const size_t dest_len, const std::vector<T>& src, const size_t src_len, const T& deflt) {
if (!dest.empty() && !src.empty()) { if (!dest->empty() && !src.empty()) {
PushVector(dest, src); PushVector(dest, src);
} else if (!dest.empty() && src.empty()) { } else if (!dest->empty() && src.empty()) {
for (size_t i = 0; i < src_len; ++i) { for (size_t i = 0; i < src_len; ++i) {
dest.push_back(deflt); dest->push_back(deflt);
} }
} else if (dest.empty() && !src.empty()) { } else if (dest->empty() && !src.empty()) {
for (size_t i = 0; i < dest_len; ++i) { for (size_t i = 0; i < dest_len; ++i) {
dest.push_back(deflt); dest->push_back(deflt);
} }
PushVector(dest, src); PushVector(dest, src);
} }
...@@ -1002,9 +1002,9 @@ void Dataset::addFeaturesFrom(Dataset* other) { ...@@ -1002,9 +1002,9 @@ void Dataset::addFeaturesFrom(Dataset* other) {
if (other->num_data_ != num_data_) { if (other->num_data_ != num_data_) {
throw std::runtime_error("Cannot add features from other Dataset with a different number of rows"); throw std::runtime_error("Cannot add features from other Dataset with a different number of rows");
} }
PushVector(feature_names_, other->feature_names_); PushVector(&feature_names_, other->feature_names_);
PushVector(feature2subfeature_, other->feature2subfeature_); PushVector(&feature2subfeature_, other->feature2subfeature_);
PushVector(group_feature_cnt_, other->group_feature_cnt_); PushVector(&group_feature_cnt_, other->group_feature_cnt_);
feature_groups_.reserve(other->feature_groups_.size()); feature_groups_.reserve(other->feature_groups_.size());
for (auto& fg : other->feature_groups_) { for (auto& fg : other->feature_groups_) {
feature_groups_.emplace_back(new FeatureGroup(*fg)); feature_groups_.emplace_back(new FeatureGroup(*fg));
...@@ -1016,17 +1016,17 @@ void Dataset::addFeaturesFrom(Dataset* other) { ...@@ -1016,17 +1016,17 @@ void Dataset::addFeaturesFrom(Dataset* other) {
used_feature_map_.push_back(-1); // Unused feature. used_feature_map_.push_back(-1); // Unused feature.
} }
} }
PushOffset(real_feature_idx_, other->real_feature_idx_, num_total_features_); PushOffset(&real_feature_idx_, other->real_feature_idx_, num_total_features_);
PushOffset(feature2group_, other->feature2group_, num_groups_); PushOffset(&feature2group_, other->feature2group_, num_groups_);
auto bin_offset = group_bin_boundaries_.back(); auto bin_offset = group_bin_boundaries_.back();
// Skip the leading 0 when copying group_bin_boundaries. // Skip the leading 0 when copying group_bin_boundaries.
for (auto i = other->group_bin_boundaries_.begin()+1; i < other->group_bin_boundaries_.end(); ++i) { for (auto i = other->group_bin_boundaries_.begin()+1; i < other->group_bin_boundaries_.end(); ++i) {
group_bin_boundaries_.push_back(*i + bin_offset); group_bin_boundaries_.push_back(*i + bin_offset);
} }
PushOffset(group_feature_start_, other->group_feature_start_, num_features_); PushOffset(&group_feature_start_, other->group_feature_start_, num_features_);
PushClearIfEmpty(monotone_types_, num_total_features_, other->monotone_types_, other->num_total_features_, (int8_t)0); PushClearIfEmpty(&monotone_types_, num_total_features_, other->monotone_types_, other->num_total_features_, (int8_t)0);
PushClearIfEmpty(feature_penalty_, num_total_features_, other->feature_penalty_, other->num_total_features_, 1.0); PushClearIfEmpty(&feature_penalty_, num_total_features_, other->feature_penalty_, other->num_total_features_, 1.0);
num_features_ += other->num_features_; num_features_ += other->num_features_;
num_total_features_ += other->num_total_features_; num_total_features_ += other->num_total_features_;
......
...@@ -190,7 +190,7 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, const char* initscore ...@@ -190,7 +190,7 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, const char* initscore
// initialize label // initialize label
dataset->metadata_.Init(dataset->num_data_, weight_idx_, group_idx_); dataset->metadata_.Init(dataset->num_data_, weight_idx_, group_idx_);
// extract features // extract features
ExtractFeaturesFromMemory(text_data, parser.get(), dataset.get()); ExtractFeaturesFromMemory(&text_data, parser.get(), dataset.get());
text_data.clear(); text_data.clear();
} else { } else {
// sample data from file // sample data from file
...@@ -242,7 +242,7 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename, ...@@ -242,7 +242,7 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename,
dataset->metadata_.Init(dataset->num_data_, weight_idx_, group_idx_); dataset->metadata_.Init(dataset->num_data_, weight_idx_, group_idx_);
dataset->CreateValid(train_data); dataset->CreateValid(train_data);
// extract features // extract features
ExtractFeaturesFromMemory(text_data, parser.get(), dataset.get()); ExtractFeaturesFromMemory(&text_data, parser.get(), dataset.get());
text_data.clear(); text_data.clear();
} else { } else {
TextReader<data_size_t> text_reader(filename, config_.header); TextReader<data_size_t> text_reader(filename, config_.header);
...@@ -692,7 +692,7 @@ Dataset* DatasetLoader::CostructFromSampleData(double** sample_values, ...@@ -692,7 +692,7 @@ Dataset* DatasetLoader::CostructFromSampleData(double** sample_values,
} }
} }
auto dataset = std::unique_ptr<Dataset>(new Dataset(num_data)); auto dataset = std::unique_ptr<Dataset>(new Dataset(num_data));
dataset->Construct(bin_mappers, sample_indices, num_per_col, total_sample_size, config_); dataset->Construct(&bin_mappers, sample_indices, num_per_col, total_sample_size, config_);
dataset->set_feature_names(feature_names_); dataset->set_feature_names(feature_names_);
return dataset.release(); return dataset.release();
} }
...@@ -798,7 +798,7 @@ std::vector<std::string> DatasetLoader::SampleTextDataFromFile(const char* filen ...@@ -798,7 +798,7 @@ std::vector<std::string> DatasetLoader::SampleTextDataFromFile(const char* filen
TextReader<data_size_t> text_reader(filename, config_.header); TextReader<data_size_t> text_reader(filename, config_.header);
std::vector<std::string> out_data; std::vector<std::string> out_data;
if (num_machines == 1 || config_.pre_partition) { if (num_machines == 1 || config_.pre_partition) {
*num_global_data = static_cast<data_size_t>(text_reader.SampleFromFile(random_, sample_cnt, &out_data)); *num_global_data = static_cast<data_size_t>(text_reader.SampleFromFile(&random_, sample_cnt, &out_data));
} else { // need partition data } else { // need partition data
// get query data // get query data
const data_size_t* query_boundaries = metadata.query_boundaries(); const data_size_t* query_boundaries = metadata.query_boundaries();
...@@ -811,7 +811,7 @@ std::vector<std::string> DatasetLoader::SampleTextDataFromFile(const char* filen ...@@ -811,7 +811,7 @@ std::vector<std::string> DatasetLoader::SampleTextDataFromFile(const char* filen
} else { } else {
return false; return false;
} }
}, used_data_indices, random_, sample_cnt, &out_data); }, used_data_indices, &random_, sample_cnt, &out_data);
} else { } else {
// if contain query file, minimal sample unit is one query // if contain query file, minimal sample unit is one query
data_size_t num_queries = metadata.num_queries(); data_size_t num_queries = metadata.num_queries();
...@@ -833,7 +833,7 @@ std::vector<std::string> DatasetLoader::SampleTextDataFromFile(const char* filen ...@@ -833,7 +833,7 @@ std::vector<std::string> DatasetLoader::SampleTextDataFromFile(const char* filen
++qid; ++qid;
} }
return is_query_used; return is_query_used;
}, used_data_indices, random_, sample_cnt, &out_data); }, used_data_indices, &random_, sample_cnt, &out_data);
} }
} }
return out_data; return out_data;
...@@ -1018,12 +1018,12 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines, ...@@ -1018,12 +1018,12 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
} }
} }
sample_values.clear(); sample_values.clear();
dataset->Construct(bin_mappers, Common::Vector2Ptr<int>(sample_indices).data(), dataset->Construct(&bin_mappers, Common::Vector2Ptr<int>(&sample_indices).data(),
Common::VectorSize<int>(sample_indices).data(), sample_data.size(), config_); Common::VectorSize<int>(sample_indices).data(), sample_data.size(), config_);
} }
/*! \brief Extract local features from memory */ /*! \brief Extract local features from memory */
void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>& text_data, const Parser* parser, Dataset* dataset) { void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_data, const Parser* parser, Dataset* dataset) {
std::vector<std::pair<int, double>> oneline_features; std::vector<std::pair<int, double>> oneline_features;
double tmp_label = 0.0f; double tmp_label = 0.0f;
if (predict_fun_ == nullptr) { if (predict_fun_ == nullptr) {
...@@ -1035,11 +1035,11 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>& text_dat ...@@ -1035,11 +1035,11 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>& text_dat
const int tid = omp_get_thread_num(); const int tid = omp_get_thread_num();
oneline_features.clear(); oneline_features.clear();
// parser // parser
parser->ParseOneLine(text_data[i].c_str(), &oneline_features, &tmp_label); parser->ParseOneLine(text_data->at(i).c_str(), &oneline_features, &tmp_label);
// set label // set label
dataset->metadata_.SetLabelAt(i, static_cast<label_t>(tmp_label)); dataset->metadata_.SetLabelAt(i, static_cast<label_t>(tmp_label));
// free processed line: // free processed line:
text_data[i].clear(); text_data->at(i).clear();
// shrink_to_fit will be very slow in linux, and seems not free memory, disable for now // shrink_to_fit will be very slow in linux, and seems not free memory, disable for now
// text_reader_->Lines()[i].shrink_to_fit(); // text_reader_->Lines()[i].shrink_to_fit();
// push data // push data
...@@ -1072,7 +1072,7 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>& text_dat ...@@ -1072,7 +1072,7 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>& text_dat
const int tid = omp_get_thread_num(); const int tid = omp_get_thread_num();
oneline_features.clear(); oneline_features.clear();
// parser // parser
parser->ParseOneLine(text_data[i].c_str(), &oneline_features, &tmp_label); parser->ParseOneLine(text_data->at(i).c_str(), &oneline_features, &tmp_label);
// set initial score // set initial score
std::vector<double> oneline_init_score(num_class_); std::vector<double> oneline_init_score(num_class_);
predict_fun_(oneline_features, oneline_init_score.data()); predict_fun_(oneline_features, oneline_init_score.data());
...@@ -1110,7 +1110,7 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>& text_dat ...@@ -1110,7 +1110,7 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>& text_dat
} }
dataset->FinishLoad(); dataset->FinishLoad();
// text data can be free after loaded feature values // text data can be free after loaded feature values
text_data.clear(); text_data->clear();
} }
/*! \brief Extract local features from file */ /*! \brief Extract local features from file */
......
...@@ -48,7 +48,7 @@ template <typename VAL_T> ...@@ -48,7 +48,7 @@ template <typename VAL_T>
class DenseBin: public Bin { class DenseBin: public Bin {
public: public:
friend DenseBinIterator<VAL_T>; friend DenseBinIterator<VAL_T>;
DenseBin(data_size_t num_data) explicit DenseBin(data_size_t num_data)
: num_data_(num_data), data_(num_data_, static_cast<VAL_T>(0)) { : num_data_(num_data), data_(num_data_, static_cast<VAL_T>(0)) {
} }
...@@ -192,7 +192,7 @@ class DenseBin: public Bin { ...@@ -192,7 +192,7 @@ class DenseBin: public Bin {
} }
} }
virtual data_size_t Split( data_size_t Split(
uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, MissingType missing_type, bool default_left, uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, MissingType missing_type, bool default_left,
uint32_t threshold, data_size_t* data_indices, data_size_t num_data, uint32_t threshold, data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const override { data_size_t* lte_indices, data_size_t* gt_indices) const override {
...@@ -253,7 +253,7 @@ class DenseBin: public Bin { ...@@ -253,7 +253,7 @@ class DenseBin: public Bin {
return lte_count; return lte_count;
} }
virtual data_size_t SplitCategorical( data_size_t SplitCategorical(
uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, uint32_t min_bin, uint32_t max_bin, uint32_t default_bin,
const uint32_t* threshold, int num_threahold, data_size_t* data_indices, data_size_t num_data, const uint32_t* threshold, int num_threahold, data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const override { data_size_t* lte_indices, data_size_t* gt_indices) const override {
......
...@@ -42,7 +42,7 @@ class Dense4bitsBinIterator : public BinIterator { ...@@ -42,7 +42,7 @@ class Dense4bitsBinIterator : public BinIterator {
class Dense4bitsBin : public Bin { class Dense4bitsBin : public Bin {
public: public:
friend Dense4bitsBinIterator; friend Dense4bitsBinIterator;
Dense4bitsBin(data_size_t num_data) explicit Dense4bitsBin(data_size_t num_data)
: num_data_(num_data) { : num_data_(num_data) {
int len = (num_data_ + 1) / 2; int len = (num_data_ + 1) / 2;
data_ = std::vector<uint8_t>(len, static_cast<uint8_t>(0)); data_ = std::vector<uint8_t>(len, static_cast<uint8_t>(0));
...@@ -215,7 +215,7 @@ class Dense4bitsBin : public Bin { ...@@ -215,7 +215,7 @@ class Dense4bitsBin : public Bin {
} }
} }
virtual data_size_t Split( data_size_t Split(
uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, MissingType missing_type, bool default_left, uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, MissingType missing_type, bool default_left,
uint32_t threshold, data_size_t* data_indices, data_size_t num_data, uint32_t threshold, data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const override { data_size_t* lte_indices, data_size_t* gt_indices) const override {
...@@ -276,7 +276,7 @@ class Dense4bitsBin : public Bin { ...@@ -276,7 +276,7 @@ class Dense4bitsBin : public Bin {
return lte_count; return lte_count;
} }
virtual data_size_t SplitCategorical( data_size_t SplitCategorical(
uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, uint32_t min_bin, uint32_t max_bin, uint32_t default_bin,
const uint32_t* threshold, int num_threahold, data_size_t* data_indices, data_size_t num_data, const uint32_t* threshold, int num_threahold, data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const override { data_size_t* lte_indices, data_size_t* gt_indices) const override {
......
...@@ -54,7 +54,7 @@ struct LocalFile : VirtualFileReader, VirtualFileWriter { ...@@ -54,7 +54,7 @@ struct LocalFile : VirtualFileReader, VirtualFileWriter {
const std::string mode_; const std::string mode_;
}; };
const std::string kHdfsProto = "hdfs://"; const char* kHdfsProto = "hdfs://";
#ifdef USE_HDFS #ifdef USE_HDFS
struct HDFSFile : VirtualFileReader, VirtualFileWriter { struct HDFSFile : VirtualFileReader, VirtualFileWriter {
......
...@@ -36,7 +36,7 @@ class OrderedSparseBin: public OrderedBin { ...@@ -36,7 +36,7 @@ class OrderedSparseBin: public OrderedBin {
SparsePair() : ridx(0), bin(0) {} SparsePair() : ridx(0), bin(0) {}
}; };
OrderedSparseBin(const SparseBin<VAL_T>* bin_data) explicit OrderedSparseBin(const SparseBin<VAL_T>* bin_data)
:bin_data_(bin_data) { :bin_data_(bin_data) {
data_size_t cur_pos = 0; data_size_t cur_pos = 0;
data_size_t i_delta = -1; data_size_t i_delta = -1;
......
...@@ -27,13 +27,13 @@ void GetStatistic(const char* str, int* comma_cnt, int* tab_cnt, int* colon_cnt) ...@@ -27,13 +27,13 @@ void GetStatistic(const char* str, int* comma_cnt, int* tab_cnt, int* colon_cnt)
} }
} }
int GetLabelIdxForLibsvm(std::string& str, int num_features, int label_idx) { int GetLabelIdxForLibsvm(const std::string& str, int num_features, int label_idx) {
if (num_features <= 0) { if (num_features <= 0) {
return label_idx; return label_idx;
} }
str = Common::Trim(str); auto str2 = Common::Trim(str);
auto pos_space = str.find_first_of(" \f\n\r\t\v"); auto pos_space = str2.find_first_of(" \f\n\r\t\v");
auto pos_colon = str.find_first_of(":"); auto pos_colon = str2.find_first_of(":");
if (pos_space == std::string::npos || pos_space < pos_colon) { if (pos_space == std::string::npos || pos_space < pos_colon) {
return label_idx; return label_idx;
} else { } else {
...@@ -41,12 +41,12 @@ int GetLabelIdxForLibsvm(std::string& str, int num_features, int label_idx) { ...@@ -41,12 +41,12 @@ int GetLabelIdxForLibsvm(std::string& str, int num_features, int label_idx) {
} }
} }
int GetLabelIdxForTSV(std::string& str, int num_features, int label_idx) { int GetLabelIdxForTSV(const std::string& str, int num_features, int label_idx) {
if (num_features <= 0) { if (num_features <= 0) {
return label_idx; return label_idx;
} }
str = Common::Trim(str); auto str2 = Common::Trim(str);
auto tokens = Common::Split(str.c_str(), '\t'); auto tokens = Common::Split(str2.c_str(), '\t');
if (static_cast<int>(tokens.size()) == num_features) { if (static_cast<int>(tokens.size()) == num_features) {
return -1; return -1;
} else { } else {
...@@ -54,12 +54,12 @@ int GetLabelIdxForTSV(std::string& str, int num_features, int label_idx) { ...@@ -54,12 +54,12 @@ int GetLabelIdxForTSV(std::string& str, int num_features, int label_idx) {
} }
} }
int GetLabelIdxForCSV(std::string& str, int num_features, int label_idx) { int GetLabelIdxForCSV(const std::string& str, int num_features, int label_idx) {
if (num_features <= 0) { if (num_features <= 0) {
return label_idx; return label_idx;
} }
str = Common::Trim(str); auto str2 = Common::Trim(str);
auto tokens = Common::Split(str.c_str(), ','); auto tokens = Common::Split(str2.c_str(), ',');
if (static_cast<int>(tokens.size()) == num_features) { if (static_cast<int>(tokens.size()) == num_features) {
return -1; return -1;
} else { } else {
...@@ -74,18 +74,18 @@ enum DataType { ...@@ -74,18 +74,18 @@ enum DataType {
LIBSVM LIBSVM
}; };
void getline(std::stringstream& ss, std::string& line, const VirtualFileReader* reader, std::vector<char>& buffer, size_t buffer_size) { void GetLine(std::stringstream* ss, std::string* line, const VirtualFileReader* reader, std::vector<char>* buffer, size_t buffer_size) {
std::getline(ss, line); std::getline(*ss, *line);
while (ss.eof()) { while (ss->eof()) {
size_t read_len = reader->Read(buffer.data(), buffer_size); size_t read_len = reader->Read(buffer->data(), buffer_size);
if (read_len <= 0) { if (read_len <= 0) {
break; break;
} }
ss.clear(); ss->clear();
ss.str(std::string(buffer.data(), read_len)); ss->str(std::string(buffer->data(), read_len));
std::string tmp; std::string tmp;
std::getline(ss, tmp); std::getline(*ss, tmp);
line += tmp; *line += tmp;
} }
} }
...@@ -105,16 +105,16 @@ Parser* Parser::CreateParser(const char* filename, bool header, int num_features ...@@ -105,16 +105,16 @@ Parser* Parser::CreateParser(const char* filename, bool header, int num_features
std::stringstream tmp_file(std::string(buffer.data(), read_len)); std::stringstream tmp_file(std::string(buffer.data(), read_len));
if (header) { if (header) {
if (!tmp_file.eof()) { if (!tmp_file.eof()) {
getline(tmp_file, line1, reader.get(), buffer, buffer_size); GetLine(&tmp_file, &line1, reader.get(), &buffer, buffer_size);
} }
} }
if (!tmp_file.eof()) { if (!tmp_file.eof()) {
getline(tmp_file, line1, reader.get(), buffer, buffer_size); GetLine(&tmp_file, &line1, reader.get(), &buffer, buffer_size);
} else { } else {
Log::Fatal("Data file %s should have at least one line", filename); Log::Fatal("Data file %s should have at least one line", filename);
} }
if (!tmp_file.eof()) { if (!tmp_file.eof()) {
getline(tmp_file, line2, reader.get(), buffer, buffer_size); GetLine(&tmp_file, &line2, reader.get(), &buffer, buffer_size);
} else { } else {
Log::Warning("Data file %s only has one line", filename); Log::Warning("Data file %s only has one line", filename);
} }
......
...@@ -75,7 +75,7 @@ class SparseBin: public Bin { ...@@ -75,7 +75,7 @@ class SparseBin: public Bin {
friend class SparseBinIterator<VAL_T>; friend class SparseBinIterator<VAL_T>;
friend class OrderedSparseBin<VAL_T>; friend class OrderedSparseBin<VAL_T>;
SparseBin(data_size_t num_data) explicit SparseBin(data_size_t num_data)
: num_data_(num_data) { : num_data_(num_data) {
int num_threads = 1; int num_threads = 1;
#pragma omp parallel #pragma omp parallel
...@@ -145,7 +145,7 @@ class SparseBin: public Bin { ...@@ -145,7 +145,7 @@ class SparseBin: public Bin {
} }
} }
virtual data_size_t Split( data_size_t Split(
uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, MissingType missing_type, bool default_left, uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, MissingType missing_type, bool default_left,
uint32_t threshold, data_size_t* data_indices, data_size_t num_data, uint32_t threshold, data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const override { data_size_t* lte_indices, data_size_t* gt_indices) const override {
...@@ -208,7 +208,7 @@ class SparseBin: public Bin { ...@@ -208,7 +208,7 @@ class SparseBin: public Bin {
return lte_count; return lte_count;
} }
virtual data_size_t SplitCategorical( data_size_t SplitCategorical(
uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, uint32_t min_bin, uint32_t max_bin, uint32_t default_bin,
const uint32_t* threshold, int num_threahold, data_size_t* data_indices, data_size_t num_data, const uint32_t* threshold, int num_threahold, data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const override { data_size_t* lte_indices, data_size_t* gt_indices) const override {
......
...@@ -72,7 +72,7 @@ class MulticlassMetric: public Metric { ...@@ -72,7 +72,7 @@ class MulticlassMetric: public Metric {
std::vector<double> rec(num_pred_per_row); std::vector<double> rec(num_pred_per_row);
objective->ConvertOutput(raw_score.data(), rec.data()); objective->ConvertOutput(raw_score.data(), rec.data());
// add loss // add loss
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], rec, config_); sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], &rec, config_);
} }
} else { } else {
#pragma omp parallel for schedule(static) reduction(+:sum_loss) #pragma omp parallel for schedule(static) reduction(+:sum_loss)
...@@ -85,7 +85,7 @@ class MulticlassMetric: public Metric { ...@@ -85,7 +85,7 @@ class MulticlassMetric: public Metric {
std::vector<double> rec(num_pred_per_row); std::vector<double> rec(num_pred_per_row);
objective->ConvertOutput(raw_score.data(), rec.data()); objective->ConvertOutput(raw_score.data(), rec.data());
// add loss // add loss
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], rec, config_) * weights_[i]; sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], &rec, config_) * weights_[i];
} }
} }
} else { } else {
...@@ -98,7 +98,7 @@ class MulticlassMetric: public Metric { ...@@ -98,7 +98,7 @@ class MulticlassMetric: public Metric {
rec[k] = static_cast<double>(score[idx]); rec[k] = static_cast<double>(score[idx]);
} }
// add loss // add loss
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], rec, config_); sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], &rec, config_);
} }
} else { } else {
#pragma omp parallel for schedule(static) reduction(+:sum_loss) #pragma omp parallel for schedule(static) reduction(+:sum_loss)
...@@ -109,7 +109,7 @@ class MulticlassMetric: public Metric { ...@@ -109,7 +109,7 @@ class MulticlassMetric: public Metric {
rec[k] = static_cast<double>(score[idx]); rec[k] = static_cast<double>(score[idx]);
} }
// add loss // add loss
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], rec, config_) * weights_[i]; sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], &rec, config_) * weights_[i];
} }
} }
} }
...@@ -138,11 +138,11 @@ class MultiErrorMetric: public MulticlassMetric<MultiErrorMetric> { ...@@ -138,11 +138,11 @@ class MultiErrorMetric: public MulticlassMetric<MultiErrorMetric> {
public: public:
explicit MultiErrorMetric(const Config& config) :MulticlassMetric<MultiErrorMetric>(config) {} explicit MultiErrorMetric(const Config& config) :MulticlassMetric<MultiErrorMetric>(config) {}
inline static double LossOnPoint(label_t label, std::vector<double>& score, const Config& config) { inline static double LossOnPoint(label_t label, std::vector<double>* score, const Config& config) {
size_t k = static_cast<size_t>(label); size_t k = static_cast<size_t>(label);
int num_larger = 0; int num_larger = 0;
for (size_t i = 0; i < score.size(); ++i) { for (size_t i = 0; i < score->size(); ++i) {
if (score[i] >= score[k]) ++num_larger; if (score->at(i) >= score->at(k)) ++num_larger;
if (num_larger > config.multi_error_top_k) return 1.0f; if (num_larger > config.multi_error_top_k) return 1.0f;
} }
return 0.0f; return 0.0f;
...@@ -162,10 +162,10 @@ class MultiSoftmaxLoglossMetric: public MulticlassMetric<MultiSoftmaxLoglossMetr ...@@ -162,10 +162,10 @@ class MultiSoftmaxLoglossMetric: public MulticlassMetric<MultiSoftmaxLoglossMetr
public: public:
explicit MultiSoftmaxLoglossMetric(const Config& config) :MulticlassMetric<MultiSoftmaxLoglossMetric>(config) {} explicit MultiSoftmaxLoglossMetric(const Config& config) :MulticlassMetric<MultiSoftmaxLoglossMetric>(config) {}
inline static double LossOnPoint(label_t label, std::vector<double>& score, const Config&) { inline static double LossOnPoint(label_t label, std::vector<double>* score, const Config&) {
size_t k = static_cast<size_t>(label); size_t k = static_cast<size_t>(label);
if (score[k] > kEpsilon) { if (score->at(k) > kEpsilon) {
return static_cast<double>(-std::log(score[k])); return static_cast<double>(-std::log(score->at(k)));
} else { } else {
return -std::log(kEpsilon); return -std::log(kEpsilon);
} }
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <LightGBM/config.h> #include <LightGBM/config.h>
#include <LightGBM/meta.h> #include <LightGBM/meta.h>
#include <LightGBM/network.h> #include <LightGBM/network.h>
#include <LightGBM/utils/common.h>
#include <string> #include <string>
#include <algorithm> #include <algorithm>
...@@ -19,7 +20,6 @@ ...@@ -19,7 +20,6 @@
#ifdef USE_SOCKET #ifdef USE_SOCKET
#include "socket_wrapper.hpp" #include "socket_wrapper.hpp"
#include <LightGBM/utils/common.h>
#endif #endif
#ifdef USE_MPI #ifdef USE_MPI
......
...@@ -6,10 +6,14 @@ ...@@ -6,10 +6,14 @@
#define LIGHTGBM_NETWORK_SOCKET_WRAPPER_HPP_ #define LIGHTGBM_NETWORK_SOCKET_WRAPPER_HPP_
#ifdef USE_SOCKET #ifdef USE_SOCKET
#include <LightGBM/utils/log.h>
#if defined(_WIN32) #if defined(_WIN32)
#ifdef _MSC_VER #ifdef _MSC_VER
#define NOMINMAX #define NOMINMAX
#endif #endif
#include <winsock2.h> #include <winsock2.h>
#include <ws2tcpip.h> #include <ws2tcpip.h>
#include <iphlpapi.h> #include <iphlpapi.h>
...@@ -18,7 +22,6 @@ ...@@ -18,7 +22,6 @@
#include <fcntl.h> #include <fcntl.h>
#include <netdb.h> #include <netdb.h>
#include <cerrno>
#include <unistd.h> #include <unistd.h>
#include <arpa/inet.h> #include <arpa/inet.h>
#include <netinet/in.h> #include <netinet/in.h>
...@@ -30,10 +33,9 @@ ...@@ -30,10 +33,9 @@
#endif #endif
#include <LightGBM/utils/log.h> #include <cerrno>
#include <string>
#include <cstdlib> #include <cstdlib>
#include <string>
#include <unordered_set> #include <unordered_set>
#ifdef _MSC_VER #ifdef _MSC_VER
......
...@@ -105,7 +105,7 @@ class LambdarankNDCG: public ObjectiveFunction { ...@@ -105,7 +105,7 @@ class LambdarankNDCG: public ObjectiveFunction {
} }
std::stable_sort(sorted_idx.begin(), sorted_idx.end(), std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
[score](data_size_t a, data_size_t b) { return score[a] > score[b]; }); [score](data_size_t a, data_size_t b) { return score[a] > score[b]; });
// get best and worst score // get best and worst score
const double best_score = score[sorted_idx[0]]; const double best_score = score[sorted_idx[0]];
data_size_t worst_idx = cnt - 1; data_size_t worst_idx = cnt - 1;
if (worst_idx > 0 && score[sorted_idx[worst_idx]] == kMinScore) { if (worst_idx > 0 && score[sorted_idx[worst_idx]] == kMinScore) {
...@@ -143,7 +143,7 @@ class LambdarankNDCG: public ObjectiveFunction { ...@@ -143,7 +143,7 @@ class LambdarankNDCG: public ObjectiveFunction {
const double paired_discount = fabs(high_discount - low_discount); const double paired_discount = fabs(high_discount - low_discount);
// get delta NDCG // get delta NDCG
double delta_pair_NDCG = dcg_gap * paired_discount * inverse_max_dcg; double delta_pair_NDCG = dcg_gap * paired_discount * inverse_max_dcg;
// regular the delta_pair_NDCG by score distance // regular the delta_pair_NDCG by score distance
if (norm_ && high_label != low_label && best_score != worst_score) { if (norm_ && high_label != low_label && best_score != worst_score) {
delta_pair_NDCG /= (0.01f + fabs(delta_score)); delta_pair_NDCG /= (0.01f + fabs(delta_score));
} }
......
...@@ -138,7 +138,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::BeforeTrain() { ...@@ -138,7 +138,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::BeforeTrain() {
} }
}); });
// copy back // copy back
std::memcpy((void*)&data, output_buffer_.data(), size); std::memcpy(reinterpret_cast<void*>(&data), output_buffer_.data(), size);
// set global sumup info // set global sumup info
this->smaller_leaf_splits_->Init(std::get<1>(data), std::get<2>(data)); this->smaller_leaf_splits_->Init(std::get<1>(data), std::get<2>(data));
// init global data count in leaf // init global data count in leaf
......
...@@ -184,8 +184,8 @@ void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_featur ...@@ -184,8 +184,8 @@ void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_featur
// copy the results asynchronously. Size depends on if double precision is used // copy the results asynchronously. Size depends on if double precision is used
size_t output_size = num_dense_feature4_ * dword_features_ * device_bin_size_ * hist_bin_entry_sz_; size_t output_size = num_dense_feature4_ * dword_features_ * device_bin_size_ * hist_bin_entry_sz_;
boost::compute::event histogram_wait_event; boost::compute::event histogram_wait_event;
host_histogram_outputs_ = (void*)queue_.enqueue_map_buffer_async(device_histogram_outputs_, boost::compute::command_queue::map_read, host_histogram_outputs_ = reinterpret_cast<void*>(queue_.enqueue_map_buffer_async(device_histogram_outputs_, boost::compute::command_queue::map_read,
0, output_size, histogram_wait_event, kernel_wait_obj_); 0, output_size, histogram_wait_event, kernel_wait_obj_));
// we will wait for this object in WaitAndGetHistograms // we will wait for this object in WaitAndGetHistograms
histograms_wait_obj_ = boost::compute::wait_list(histogram_wait_event); histograms_wait_obj_ = boost::compute::wait_list(histogram_wait_event);
} }
...@@ -736,7 +736,7 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) { ...@@ -736,7 +736,7 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
} }
Tree* GPUTreeLearner::Train(const score_t* gradients, const score_t *hessians, Tree* GPUTreeLearner::Train(const score_t* gradients, const score_t *hessians,
bool is_constant_hessian, Json& forced_split_json) { bool is_constant_hessian, const Json& forced_split_json) {
// check if we need to recompile the GPU kernel (is_constant_hessian changed) // check if we need to recompile the GPU kernel (is_constant_hessian changed)
// this should rarely occur // this should rarely occur
if (is_constant_hessian != is_constant_hessian_) { if (is_constant_hessian != is_constant_hessian_) {
...@@ -977,7 +977,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u ...@@ -977,7 +977,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u
train_data_->ConstructHistograms(is_sparse_feature_used, train_data_->ConstructHistograms(is_sparse_feature_used,
nullptr, smaller_leaf_splits_->num_data_in_leaf(), nullptr, smaller_leaf_splits_->num_data_in_leaf(),
smaller_leaf_splits_->LeafIndex(), smaller_leaf_splits_->LeafIndex(),
ordered_bins_, gradients_, hessians_, &ordered_bins_, gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_, ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_,
ptr_smaller_leaf_hist_data); ptr_smaller_leaf_hist_data);
// wait for GPU to finish, only if GPU is actually used // wait for GPU to finish, only if GPU is actually used
...@@ -1030,7 +1030,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u ...@@ -1030,7 +1030,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u
train_data_->ConstructHistograms(is_sparse_feature_used, train_data_->ConstructHistograms(is_sparse_feature_used,
nullptr, larger_leaf_splits_->num_data_in_leaf(), nullptr, larger_leaf_splits_->num_data_in_leaf(),
larger_leaf_splits_->LeafIndex(), larger_leaf_splits_->LeafIndex(),
ordered_bins_, gradients_, hessians_, &ordered_bins_, gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_, ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_,
ptr_larger_leaf_hist_data); ptr_larger_leaf_hist_data);
// wait for GPU to finish, only if GPU is actually used // wait for GPU to finish, only if GPU is actually used
......
...@@ -48,7 +48,7 @@ class GPUTreeLearner: public SerialTreeLearner { ...@@ -48,7 +48,7 @@ class GPUTreeLearner: public SerialTreeLearner {
void Init(const Dataset* train_data, bool is_constant_hessian) override; void Init(const Dataset* train_data, bool is_constant_hessian) override;
void ResetTrainingData(const Dataset* train_data) override; void ResetTrainingData(const Dataset* train_data) override;
Tree* Train(const score_t* gradients, const score_t *hessians, Tree* Train(const score_t* gradients, const score_t *hessians,
bool is_constant_hessian, Json& forced_split_json) override; bool is_constant_hessian, const Json& forced_split_json) override;
void SetBaggingData(const data_size_t* used_indices, data_size_t num_data) override { void SetBaggingData(const data_size_t* used_indices, data_size_t num_data) override {
SerialTreeLearner::SetBaggingData(used_indices, num_data); SerialTreeLearner::SetBaggingData(used_indices, num_data);
......
...@@ -19,7 +19,7 @@ namespace LightGBM { ...@@ -19,7 +19,7 @@ namespace LightGBM {
*/ */
class LeafSplits { class LeafSplits {
public: public:
LeafSplits(data_size_t num_data) explicit LeafSplits(data_size_t num_data)
:num_data_in_leaf_(num_data), num_data_(num_data), :num_data_in_leaf_(num_data), num_data_(num_data),
data_indices_(nullptr) { data_indices_(nullptr) {
} }
......
...@@ -170,7 +170,7 @@ void SerialTreeLearner::ResetConfig(const Config* config) { ...@@ -170,7 +170,7 @@ void SerialTreeLearner::ResetConfig(const Config* config) {
histogram_pool_.ResetConfig(config_); histogram_pool_.ResetConfig(config_);
} }
Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians, bool is_constant_hessian, Json& forced_split_json) { Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians, bool is_constant_hessian, const Json& forced_split_json) {
gradients_ = gradients; gradients_ = gradients;
hessians_ = hessians; hessians_ = hessians;
is_constant_hessian_ = is_constant_hessian; is_constant_hessian_ = is_constant_hessian;
...@@ -290,7 +290,7 @@ std::vector<int8_t> SerialTreeLearner::GetUsedFeatures(bool is_tree_level) { ...@@ -290,7 +290,7 @@ std::vector<int8_t> SerialTreeLearner::GetUsedFeatures(bool is_tree_level) {
CHECK(inner_feature_index >= 0); CHECK(inner_feature_index >= 0);
ret[inner_feature_index] = 1; ret[inner_feature_index] = 1;
} }
} else if(used_feature_indices_.size() <= 0) { } else if (used_feature_indices_.size() <= 0) {
int used_feature_cnt = static_cast<int>(std::round(valid_feature_indices_.size() * config_->feature_fraction_bynode)); int used_feature_cnt = static_cast<int>(std::round(valid_feature_indices_.size() * config_->feature_fraction_bynode));
used_feature_cnt = std::max(used_feature_cnt, min_used_features); used_feature_cnt = std::max(used_feature_cnt, min_used_features);
auto sampled_indices = random_.Sample(static_cast<int>(valid_feature_indices_.size()), used_feature_cnt); auto sampled_indices = random_.Sample(static_cast<int>(valid_feature_indices_.size()), used_feature_cnt);
...@@ -502,7 +502,7 @@ void SerialTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_featur ...@@ -502,7 +502,7 @@ void SerialTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_featur
train_data_->ConstructHistograms(is_feature_used, train_data_->ConstructHistograms(is_feature_used,
smaller_leaf_splits_->data_indices(), smaller_leaf_splits_->num_data_in_leaf(), smaller_leaf_splits_->data_indices(), smaller_leaf_splits_->num_data_in_leaf(),
smaller_leaf_splits_->LeafIndex(), smaller_leaf_splits_->LeafIndex(),
ordered_bins_, gradients_, hessians_, &ordered_bins_, gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_, ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_,
ptr_smaller_leaf_hist_data); ptr_smaller_leaf_hist_data);
...@@ -512,7 +512,7 @@ void SerialTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_featur ...@@ -512,7 +512,7 @@ void SerialTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_featur
train_data_->ConstructHistograms(is_feature_used, train_data_->ConstructHistograms(is_feature_used,
larger_leaf_splits_->data_indices(), larger_leaf_splits_->num_data_in_leaf(), larger_leaf_splits_->data_indices(), larger_leaf_splits_->num_data_in_leaf(),
larger_leaf_splits_->LeafIndex(), larger_leaf_splits_->LeafIndex(),
ordered_bins_, gradients_, hessians_, &ordered_bins_, gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_, ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_,
ptr_larger_leaf_hist_data); ptr_larger_leaf_hist_data);
} }
...@@ -636,7 +636,7 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>& ...@@ -636,7 +636,7 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
#endif #endif
} }
int32_t SerialTreeLearner::ForceSplits(Tree* tree, Json& forced_split_json, int* left_leaf, int32_t SerialTreeLearner::ForceSplits(Tree* tree, const Json& forced_split_json, int* left_leaf,
int* right_leaf, int *cur_depth, int* right_leaf, int *cur_depth,
bool *aborted_last_force_split) { bool *aborted_last_force_split) {
int32_t result_count = 0; int32_t result_count = 0;
...@@ -819,7 +819,7 @@ void SerialTreeLearner::Split(Tree* tree, int best_leaf, int* left_leaf, int* ri ...@@ -819,7 +819,7 @@ void SerialTreeLearner::Split(Tree* tree, int best_leaf, int* left_leaf, int* ri
auto tmp_idx = data_partition_->GetIndexOnLeaf(best_leaf, &cnt_leaf_data); auto tmp_idx = data_partition_->GetIndexOnLeaf(best_leaf, &cnt_leaf_data);
for (data_size_t i_input = 0; i_input < cnt_leaf_data; ++i_input) { for (data_size_t i_input = 0; i_input < cnt_leaf_data; ++i_input) {
int real_idx = tmp_idx[i_input]; int real_idx = tmp_idx[i_input];
Common::InsertBitset(feature_used_in_data, train_data_->num_data() * inner_feature_index + real_idx); Common::InsertBitset(&feature_used_in_data, train_data_->num_data() * inner_feature_index + real_idx);
} }
} }
...@@ -932,8 +932,8 @@ void SerialTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj ...@@ -932,8 +932,8 @@ void SerialTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj
for (int i = 0; i < tree->num_leaves(); ++i) { for (int i = 0; i < tree->num_leaves(); ++i) {
outputs[i] = static_cast<double>(tree->LeafOutput(i)); outputs[i] = static_cast<double>(tree->LeafOutput(i));
} }
Network::GlobalSum(outputs); outputs = Network::GlobalSum(&outputs);
Network::GlobalSum(n_nozeroworker_perleaf); n_nozeroworker_perleaf = Network::GlobalSum(&n_nozeroworker_perleaf);
for (int i = 0; i < tree->num_leaves(); ++i) { for (int i = 0; i < tree->num_leaves(); ++i) {
tree->SetLeafOutput(i, outputs[i] / n_nozeroworker_perleaf[i]); tree->SetLeafOutput(i, outputs[i] / n_nozeroworker_perleaf[i]);
} }
......
...@@ -49,7 +49,7 @@ class SerialTreeLearner: public TreeLearner { ...@@ -49,7 +49,7 @@ class SerialTreeLearner: public TreeLearner {
void ResetConfig(const Config* config) override; void ResetConfig(const Config* config) override;
Tree* Train(const score_t* gradients, const score_t *hessians, bool is_constant_hessian, Tree* Train(const score_t* gradients, const score_t *hessians, bool is_constant_hessian,
Json& forced_split_json) override; const Json& forced_split_json) override;
Tree* FitByExistingTree(const Tree* old_tree, const score_t* gradients, const score_t* hessians) const override; Tree* FitByExistingTree(const Tree* old_tree, const score_t* gradients, const score_t* hessians) const override;
...@@ -78,7 +78,6 @@ class SerialTreeLearner: public TreeLearner { ...@@ -78,7 +78,6 @@ class SerialTreeLearner: public TreeLearner {
data_size_t total_num_data, const data_size_t* bag_indices, data_size_t bag_cnt) const override; data_size_t total_num_data, const data_size_t* bag_indices, data_size_t bag_cnt) const override;
protected: protected:
virtual std::vector<int8_t> GetUsedFeatures(bool is_tree_level); virtual std::vector<int8_t> GetUsedFeatures(bool is_tree_level);
/*! /*!
* \brief Some initial works before training * \brief Some initial works before training
...@@ -106,7 +105,7 @@ class SerialTreeLearner: public TreeLearner { ...@@ -106,7 +105,7 @@ class SerialTreeLearner: public TreeLearner {
virtual void Split(Tree* tree, int best_leaf, int* left_leaf, int* right_leaf); virtual void Split(Tree* tree, int best_leaf, int* left_leaf, int* right_leaf);
/* Force splits with forced_split_json dict and then return num splits forced.*/ /* Force splits with forced_split_json dict and then return num splits forced.*/
virtual int32_t ForceSplits(Tree* tree, Json& forced_split_json, int* left_leaf, virtual int32_t ForceSplits(Tree* tree, const Json& forced_split_json, int* left_leaf,
int* right_leaf, int* cur_depth, int* right_leaf, int* cur_depth,
bool *aborted_last_force_split); bool *aborted_last_force_split);
......
...@@ -135,7 +135,7 @@ void VotingParallelTreeLearner<TREELEARNER_T>::BeforeTrain() { ...@@ -135,7 +135,7 @@ void VotingParallelTreeLearner<TREELEARNER_T>::BeforeTrain() {
} }
}); });
std::memcpy((void*)&data, output_buffer_.data(), size); std::memcpy(reinterpret_cast<void*>(&data), output_buffer_.data(), size);
// set global sumup info // set global sumup info
smaller_leaf_splits_global_->Init(std::get<1>(data), std::get<2>(data)); smaller_leaf_splits_global_->Init(std::get<1>(data), std::get<2>(data));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment