Commit 062bfa79 authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

Revert "[WIP]faster histogram sum up" (#422)

* Revert "python-package: support valid_names in scikit-learn API (#420)"

This reverts commit de39dbcf.

* Revert "faster histogram sum up (#418)"

This reverts commit 98c7c2a3.
parent de39dbcf
...@@ -71,8 +71,8 @@ public: ...@@ -71,8 +71,8 @@ public:
ConstructSigmoidTable(); ConstructSigmoidTable();
} }
void GetGradients(const double* score, float* gradients, void GetGradients(const double* score, score_t* gradients,
float* hessians) const override { score_t* hessians) const override {
#pragma omp parallel for schedule(guided) #pragma omp parallel for schedule(guided)
for (data_size_t i = 0; i < num_queries_; ++i) { for (data_size_t i = 0; i < num_queries_; ++i) {
GetGradientsForOneQuery(score, gradients, hessians, i); GetGradientsForOneQuery(score, gradients, hessians, i);
...@@ -80,7 +80,7 @@ public: ...@@ -80,7 +80,7 @@ public:
} }
inline void GetGradientsForOneQuery(const double* score, inline void GetGradientsForOneQuery(const double* score,
float* lambdas, float* hessians, data_size_t query_id) const { score_t* lambdas, score_t* hessians, data_size_t query_id) const {
// get doc boundary for current query // get doc boundary for current query
const data_size_t start = query_boundaries_[query_id]; const data_size_t start = query_boundaries_[query_id];
const data_size_t cnt = const data_size_t cnt =
...@@ -153,12 +153,12 @@ public: ...@@ -153,12 +153,12 @@ public:
p_hessian *= 2 * delta_pair_NDCG; p_hessian *= 2 * delta_pair_NDCG;
high_sum_lambda += p_lambda; high_sum_lambda += p_lambda;
high_sum_hessian += p_hessian; high_sum_hessian += p_hessian;
lambdas[low] -= static_cast<float>(p_lambda); lambdas[low] -= static_cast<score_t>(p_lambda);
hessians[low] += static_cast<float>(p_hessian); hessians[low] += static_cast<score_t>(p_hessian);
} }
// update // update
lambdas[high] += static_cast<float>(high_sum_lambda); lambdas[high] += static_cast<score_t>(high_sum_lambda);
hessians[high] += static_cast<float>(high_sum_hessian); hessians[high] += static_cast<score_t>(high_sum_hessian);
} }
// if need weights // if need weights
if (weights_ != nullptr) { if (weights_ != nullptr) {
......
...@@ -26,18 +26,18 @@ public: ...@@ -26,18 +26,18 @@ public:
weights_ = metadata.weights(); weights_ = metadata.weights();
} }
void GetGradients(const double* score, float* gradients, void GetGradients(const double* score, score_t* gradients,
float* hessians) const override { score_t* hessians) const override {
if (weights_ == nullptr) { if (weights_ == nullptr) {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
gradients[i] = static_cast<float>(score[i] - label_[i]); gradients[i] = static_cast<score_t>(score[i] - label_[i]);
hessians[i] = 1.0f; hessians[i] = 1.0f;
} }
} else { } else {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
gradients[i] = static_cast<float>(score[i] - label_[i]) * weights_[i]; gradients[i] = static_cast<score_t>(score[i] - label_[i]) * weights_[i];
hessians[i] = weights_[i]; hessians[i] = weights_[i];
} }
} }
...@@ -93,8 +93,8 @@ public: ...@@ -93,8 +93,8 @@ public:
weights_ = metadata.weights(); weights_ = metadata.weights();
} }
void GetGradients(const double* score, float* gradients, void GetGradients(const double* score, score_t* gradients,
float* hessians) const override { score_t* hessians) const override {
if (weights_ == nullptr) { if (weights_ == nullptr) {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
...@@ -104,7 +104,7 @@ public: ...@@ -104,7 +104,7 @@ public:
} else { } else {
gradients[i] = -1.0f; gradients[i] = -1.0f;
} }
hessians[i] = static_cast<float>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_)); hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_));
} }
} else { } else {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
...@@ -115,7 +115,7 @@ public: ...@@ -115,7 +115,7 @@ public:
} else { } else {
gradients[i] = -weights_[i]; gradients[i] = -weights_[i];
} }
hessians[i] = static_cast<float>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_, weights_[i])); hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_, weights_[i]));
} }
} }
} }
...@@ -166,23 +166,23 @@ public: ...@@ -166,23 +166,23 @@ public:
weights_ = metadata.weights(); weights_ = metadata.weights();
} }
void GetGradients(const double* score, float* gradients, void GetGradients(const double* score, score_t* gradients,
float* hessians) const override { score_t* hessians) const override {
if (weights_ == nullptr) { if (weights_ == nullptr) {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
const double diff = score[i] - label_[i]; const double diff = score[i] - label_[i];
if (std::abs(diff) <= delta_) { if (std::abs(diff) <= delta_) {
gradients[i] = static_cast<float>(diff); gradients[i] = static_cast<score_t>(diff);
hessians[i] = 1.0f; hessians[i] = 1.0f;
} else { } else {
if (diff >= 0.0f) { if (diff >= 0.0f) {
gradients[i] = static_cast<float>(delta_); gradients[i] = static_cast<score_t>(delta_);
} else { } else {
gradients[i] = static_cast<float>(-delta_); gradients[i] = static_cast<score_t>(-delta_);
} }
hessians[i] = static_cast<float>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_)); hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_));
} }
} }
} else { } else {
...@@ -191,15 +191,15 @@ public: ...@@ -191,15 +191,15 @@ public:
const double diff = score[i] - label_[i]; const double diff = score[i] - label_[i];
if (std::abs(diff) <= delta_) { if (std::abs(diff) <= delta_) {
gradients[i] = static_cast<float>(diff * weights_[i]); gradients[i] = static_cast<score_t>(diff * weights_[i]);
hessians[i] = weights_[i]; hessians[i] = weights_[i];
} else { } else {
if (diff >= 0.0f) { if (diff >= 0.0f) {
gradients[i] = static_cast<float>(delta_ * weights_[i]); gradients[i] = static_cast<score_t>(delta_ * weights_[i]);
} else { } else {
gradients[i] = static_cast<float>(-delta_ * weights_[i]); gradients[i] = static_cast<score_t>(-delta_ * weights_[i]);
} }
hessians[i] = static_cast<float>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_, weights_[i])); hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_, weights_[i]));
} }
} }
} }
...@@ -250,21 +250,21 @@ public: ...@@ -250,21 +250,21 @@ public:
weights_ = metadata.weights(); weights_ = metadata.weights();
} }
void GetGradients(const double* score, float* gradients, void GetGradients(const double* score, score_t* gradients,
float* hessians) const override { score_t* hessians) const override {
if (weights_ == nullptr) { if (weights_ == nullptr) {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
const double x = score[i] - label_[i]; const double x = score[i] - label_[i];
gradients[i] = static_cast<float>(c_ * x / (std::fabs(x) + c_)); gradients[i] = static_cast<score_t>(c_ * x / (std::fabs(x) + c_));
hessians[i] = static_cast<float>(c_ * c_ / ((std::fabs(x) + c_) * (std::fabs(x) + c_))); hessians[i] = static_cast<score_t>(c_ * c_ / ((std::fabs(x) + c_) * (std::fabs(x) + c_)));
} }
} else { } else {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
const double x = score[i] - label_[i]; const double x = score[i] - label_[i];
gradients[i] = static_cast<float>(c_ * x / (std::fabs(x) + c_) * weights_[i]); gradients[i] = static_cast<score_t>(c_ * x / (std::fabs(x) + c_) * weights_[i]);
hessians[i] = static_cast<float>(c_ * c_ / ((std::fabs(x) + c_) * (std::fabs(x) + c_)) * weights_[i]); hessians[i] = static_cast<score_t>(c_ * c_ / ((std::fabs(x) + c_) * (std::fabs(x) + c_)) * weights_[i]);
} }
} }
} }
...@@ -314,19 +314,19 @@ public: ...@@ -314,19 +314,19 @@ public:
weights_ = metadata.weights(); weights_ = metadata.weights();
} }
void GetGradients(const double* score, float* gradients, void GetGradients(const double* score, score_t* gradients,
float* hessians) const override { score_t* hessians) const override {
if (weights_ == nullptr) { if (weights_ == nullptr) {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
gradients[i] = static_cast<float>(score[i] - label_[i]); gradients[i] = static_cast<score_t>(score[i] - label_[i]);
hessians[i] = static_cast<float>(score[i] + max_delta_step_); hessians[i] = static_cast<score_t>(score[i] + max_delta_step_);
} }
} else { } else {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
gradients[i] = static_cast<float>((score[i] - label_[i]) * weights_[i]); gradients[i] = static_cast<score_t>((score[i] - label_[i]) * weights_[i]);
hessians[i] = static_cast<float>((score[i] + max_delta_step_) * weights_[i]); hessians[i] = static_cast<score_t>((score[i] + max_delta_step_) * weights_[i]);
} }
} }
} }
......
...@@ -269,24 +269,24 @@ void GPUTreeLearner::AllocateGPUMemory() { ...@@ -269,24 +269,24 @@ void GPUTreeLearner::AllocateGPUMemory() {
ordered_gradients_.reserve(allocated_num_data_); ordered_gradients_.reserve(allocated_num_data_);
ordered_hessians_.reserve(allocated_num_data_); ordered_hessians_.reserve(allocated_num_data_);
pinned_gradients_ = boost::compute::buffer(); // deallocate pinned_gradients_ = boost::compute::buffer(); // deallocate
pinned_gradients_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(float), pinned_gradients_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(score_t),
boost::compute::memory_object::read_write | boost::compute::memory_object::use_host_ptr, boost::compute::memory_object::read_write | boost::compute::memory_object::use_host_ptr,
ordered_gradients_.data()); ordered_gradients_.data());
ptr_pinned_gradients_ = queue_.enqueue_map_buffer(pinned_gradients_, boost::compute::command_queue::map_write_invalidate_region, ptr_pinned_gradients_ = queue_.enqueue_map_buffer(pinned_gradients_, boost::compute::command_queue::map_write_invalidate_region,
0, allocated_num_data_ * sizeof(float)); 0, allocated_num_data_ * sizeof(score_t));
pinned_hessians_ = boost::compute::buffer(); // deallocate pinned_hessians_ = boost::compute::buffer(); // deallocate
pinned_hessians_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(float), pinned_hessians_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(score_t),
boost::compute::memory_object::read_write | boost::compute::memory_object::use_host_ptr, boost::compute::memory_object::read_write | boost::compute::memory_object::use_host_ptr,
ordered_hessians_.data()); ordered_hessians_.data());
ptr_pinned_hessians_ = queue_.enqueue_map_buffer(pinned_hessians_, boost::compute::command_queue::map_write_invalidate_region, ptr_pinned_hessians_ = queue_.enqueue_map_buffer(pinned_hessians_, boost::compute::command_queue::map_write_invalidate_region,
0, allocated_num_data_ * sizeof(float)); 0, allocated_num_data_ * sizeof(score_t));
// allocate space for gradients and hessians on device // allocate space for gradients and hessians on device
// we will copy gradients and hessians in after ordered_gradients_ and ordered_hessians_ are constructed // we will copy gradients and hessians in after ordered_gradients_ and ordered_hessians_ are constructed
device_gradients_ = boost::compute::buffer(); // deallocate device_gradients_ = boost::compute::buffer(); // deallocate
device_gradients_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(float), device_gradients_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(score_t),
boost::compute::memory_object::read_only, nullptr); boost::compute::memory_object::read_only, nullptr);
device_hessians_ = boost::compute::buffer(); // deallocate device_hessians_ = boost::compute::buffer(); // deallocate
device_hessians_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(float), device_hessians_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(score_t),
boost::compute::memory_object::read_only, nullptr); boost::compute::memory_object::read_only, nullptr);
// allocate feature mask, for disabling some feature-groups' histogram calculation // allocate feature mask, for disabling some feature-groups' histogram calculation
feature_masks_.resize(num_dense_feature4_ * dword_features_); feature_masks_.resize(num_dense_feature4_ * dword_features_);
...@@ -723,7 +723,7 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) { ...@@ -723,7 +723,7 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
SetupKernelArguments(); SetupKernelArguments();
} }
Tree* GPUTreeLearner::Train(const float* gradients, const float *hessians, bool is_constant_hessian) { Tree* GPUTreeLearner::Train(const score_t* gradients, const score_t *hessians, bool is_constant_hessian) {
// check if we need to recompile the GPU kernel (is_constant_hessian changed) // check if we need to recompile the GPU kernel (is_constant_hessian changed)
// this should rarely occur // this should rarely occur
if (is_constant_hessian != is_constant_hessian_) { if (is_constant_hessian != is_constant_hessian_) {
...@@ -753,11 +753,11 @@ void GPUTreeLearner::BeforeTrain() { ...@@ -753,11 +753,11 @@ void GPUTreeLearner::BeforeTrain() {
// We start copying as early as possible, instead of at ConstructHistogram(). // We start copying as early as possible, instead of at ConstructHistogram().
if (!use_bagging_ && num_dense_feature_groups_) { if (!use_bagging_ && num_dense_feature_groups_) {
if (!is_constant_hessian_) { if (!is_constant_hessian_) {
hessians_future_ = queue_.enqueue_write_buffer_async(device_hessians_, 0, num_data_ * sizeof(float), hessians_); hessians_future_ = queue_.enqueue_write_buffer_async(device_hessians_, 0, num_data_ * sizeof(score_t), hessians_);
} }
else { else {
// setup hessian parameters only // setup hessian parameters only
float const_hessian = hessians_[0]; score_t const_hessian = hessians_[0];
for (int i = 0; i <= kMaxLogWorkgroupsPerFeature; ++i) { for (int i = 0; i <= kMaxLogWorkgroupsPerFeature; ++i) {
// hessian is passed as a parameter // hessian is passed as a parameter
histogram_kernels_[i].set_arg(6, const_hessian); histogram_kernels_[i].set_arg(6, const_hessian);
...@@ -765,7 +765,7 @@ void GPUTreeLearner::BeforeTrain() { ...@@ -765,7 +765,7 @@ void GPUTreeLearner::BeforeTrain() {
histogram_fulldata_kernels_[i].set_arg(6, const_hessian); histogram_fulldata_kernels_[i].set_arg(6, const_hessian);
} }
} }
gradients_future_ = queue_.enqueue_write_buffer_async(device_gradients_, 0, num_data_ * sizeof(float), gradients_); gradients_future_ = queue_.enqueue_write_buffer_async(device_gradients_, 0, num_data_ * sizeof(score_t), gradients_);
} }
SerialTreeLearner::BeforeTrain(); SerialTreeLearner::BeforeTrain();
...@@ -787,11 +787,11 @@ void GPUTreeLearner::BeforeTrain() { ...@@ -787,11 +787,11 @@ void GPUTreeLearner::BeforeTrain() {
ordered_hessians_[i] = hessians_[indices[i]]; ordered_hessians_[i] = hessians_[indices[i]];
} }
// transfer hessian to GPU // transfer hessian to GPU
hessians_future_ = queue_.enqueue_write_buffer_async(device_hessians_, 0, cnt * sizeof(float), ordered_hessians_.data()); hessians_future_ = queue_.enqueue_write_buffer_async(device_hessians_, 0, cnt * sizeof(score_t), ordered_hessians_.data());
} }
else { else {
// setup hessian parameters only // setup hessian parameters only
float const_hessian = hessians_[indices[0]]; score_t const_hessian = hessians_[indices[0]];
for (int i = 0; i <= kMaxLogWorkgroupsPerFeature; ++i) { for (int i = 0; i <= kMaxLogWorkgroupsPerFeature; ++i) {
// hessian is passed as a parameter // hessian is passed as a parameter
histogram_kernels_[i].set_arg(6, const_hessian); histogram_kernels_[i].set_arg(6, const_hessian);
...@@ -804,7 +804,7 @@ void GPUTreeLearner::BeforeTrain() { ...@@ -804,7 +804,7 @@ void GPUTreeLearner::BeforeTrain() {
ordered_gradients_[i] = gradients_[indices[i]]; ordered_gradients_[i] = gradients_[indices[i]];
} }
// transfer gradients to GPU // transfer gradients to GPU
gradients_future_ = queue_.enqueue_write_buffer_async(device_gradients_, 0, cnt * sizeof(float), ordered_gradients_.data()); gradients_future_ = queue_.enqueue_write_buffer_async(device_gradients_, 0, cnt * sizeof(score_t), ordered_gradients_.data());
} }
} }
...@@ -842,7 +842,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri ...@@ -842,7 +842,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
ordered_hessians_[i - begin] = hessians_[indices[i]]; ordered_hessians_[i - begin] = hessians_[indices[i]];
} }
// copy ordered hessians to the GPU: // copy ordered hessians to the GPU:
hessians_future_ = queue_.enqueue_write_buffer_async(device_hessians_, 0, (end - begin) * sizeof(float), ptr_pinned_hessians_); hessians_future_ = queue_.enqueue_write_buffer_async(device_hessians_, 0, (end - begin) * sizeof(score_t), ptr_pinned_hessians_);
} }
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
...@@ -850,7 +850,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri ...@@ -850,7 +850,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
ordered_gradients_[i - begin] = gradients_[indices[i]]; ordered_gradients_[i - begin] = gradients_[indices[i]];
} }
// copy ordered gradients to the GPU: // copy ordered gradients to the GPU:
gradients_future_ = queue_.enqueue_write_buffer_async(device_gradients_, 0, (end - begin) * sizeof(float), ptr_pinned_gradients_); gradients_future_ = queue_.enqueue_write_buffer_async(device_gradients_, 0, (end - begin) * sizeof(score_t), ptr_pinned_gradients_);
#if GPU_DEBUG >= 2 #if GPU_DEBUG >= 2
Log::Info("gradients/hessians/indiex copied to device with size %d", end - begin); Log::Info("gradients/hessians/indiex copied to device with size %d", end - begin);
...@@ -862,8 +862,8 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri ...@@ -862,8 +862,8 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
bool GPUTreeLearner::ConstructGPUHistogramsAsync( bool GPUTreeLearner::ConstructGPUHistogramsAsync(
const std::vector<int8_t>& is_feature_used, const std::vector<int8_t>& is_feature_used,
const data_size_t* data_indices, data_size_t num_data, const data_size_t* data_indices, data_size_t num_data,
const float* gradients, const float* hessians, const score_t* gradients, const score_t* hessians,
float* ordered_gradients, float* ordered_hessians) { score_t* ordered_gradients, score_t* ordered_hessians) {
if (num_data <= 0) { if (num_data <= 0) {
return false; return false;
...@@ -884,10 +884,10 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync( ...@@ -884,10 +884,10 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
for (data_size_t i = 0; i < num_data; ++i) { for (data_size_t i = 0; i < num_data; ++i) {
ordered_gradients[i] = gradients[data_indices[i]]; ordered_gradients[i] = gradients[data_indices[i]];
} }
gradients_future_ = queue_.enqueue_write_buffer_async(device_gradients_, 0, num_data * sizeof(float), ptr_pinned_gradients_); gradients_future_ = queue_.enqueue_write_buffer_async(device_gradients_, 0, num_data * sizeof(score_t), ptr_pinned_gradients_);
} }
else { else {
gradients_future_ = queue_.enqueue_write_buffer_async(device_gradients_, 0, num_data * sizeof(float), gradients); gradients_future_ = queue_.enqueue_write_buffer_async(device_gradients_, 0, num_data * sizeof(score_t), gradients);
} }
} }
// generate and copy ordered_hessians if hessians is not null // generate and copy ordered_hessians if hessians is not null
...@@ -897,10 +897,10 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync( ...@@ -897,10 +897,10 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
for (data_size_t i = 0; i < num_data; ++i) { for (data_size_t i = 0; i < num_data; ++i) {
ordered_hessians[i] = hessians[data_indices[i]]; ordered_hessians[i] = hessians[data_indices[i]];
} }
hessians_future_ = queue_.enqueue_write_buffer_async(device_hessians_, 0, num_data * sizeof(float), ptr_pinned_hessians_); hessians_future_ = queue_.enqueue_write_buffer_async(device_hessians_, 0, num_data * sizeof(score_t), ptr_pinned_hessians_);
} }
else { else {
hessians_future_ = queue_.enqueue_write_buffer_async(device_hessians_, 0, num_data * sizeof(float), hessians); hessians_future_ = queue_.enqueue_write_buffer_async(device_hessians_, 0, num_data * sizeof(score_t), hessians);
} }
} }
// converted indices in is_feature_used to feature-group indices // converted indices in is_feature_used to feature-group indices
......
...@@ -40,7 +40,7 @@ public: ...@@ -40,7 +40,7 @@ public:
~GPUTreeLearner(); ~GPUTreeLearner();
void Init(const Dataset* train_data, bool is_constant_hessian) override; void Init(const Dataset* train_data, bool is_constant_hessian) override;
void ResetTrainingData(const Dataset* train_data) override; void ResetTrainingData(const Dataset* train_data) override;
Tree* Train(const float* gradients, const float *hessians, bool is_constant_hessian) override; Tree* Train(const score_t* gradients, const score_t *hessians, bool is_constant_hessian) override;
void SetBaggingData(const data_size_t* used_indices, data_size_t num_data) override { void SetBaggingData(const data_size_t* used_indices, data_size_t num_data) override {
SerialTreeLearner::SetBaggingData(used_indices, num_data); SerialTreeLearner::SetBaggingData(used_indices, num_data);
...@@ -77,8 +77,8 @@ private: ...@@ -77,8 +77,8 @@ private:
/*! \brief Single precision histogram entiry for GPU */ /*! \brief Single precision histogram entiry for GPU */
struct GPUHistogramBinEntry { struct GPUHistogramBinEntry {
float sum_gradients; score_t sum_gradients;
float sum_hessians; score_t sum_hessians;
uint32_t cnt; uint32_t cnt;
}; };
...@@ -146,8 +146,8 @@ private: ...@@ -146,8 +146,8 @@ private:
bool ConstructGPUHistogramsAsync( bool ConstructGPUHistogramsAsync(
const std::vector<int8_t>& is_feature_used, const std::vector<int8_t>& is_feature_used,
const data_size_t* data_indices, data_size_t num_data, const data_size_t* data_indices, data_size_t num_data,
const float* gradients, const float* hessians, const score_t* gradients, const score_t* hessians,
float* ordered_gradients, float* ordered_hessians); score_t* ordered_gradients, score_t* ordered_hessians);
/*! brief Log2 of max number of workgroups per feature*/ /*! brief Log2 of max number of workgroups per feature*/
......
...@@ -44,7 +44,7 @@ public: ...@@ -44,7 +44,7 @@ public:
* \param gradients * \param gradients
* \param hessians * \param hessians
*/ */
void Init(const float* gradients, const float* hessians) { void Init(const score_t* gradients, const score_t* hessians) {
num_data_in_leaf_ = num_data_; num_data_in_leaf_ = num_data_;
leaf_index_ = 0; leaf_index_ = 0;
data_indices_ = nullptr; data_indices_ = nullptr;
...@@ -66,7 +66,7 @@ public: ...@@ -66,7 +66,7 @@ public:
* \param gradients * \param gradients
* \param hessians * \param hessians
*/ */
void Init(int leaf, const DataPartition* data_partition, const float* gradients, const float* hessians) { void Init(int leaf, const DataPartition* data_partition, const score_t* gradients, const score_t* hessians) {
leaf_index_ = leaf; leaf_index_ = leaf;
data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_); data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_);
double tmp_sum_gradients = 0.0f; double tmp_sum_gradients = 0.0f;
......
...@@ -165,7 +165,7 @@ void SerialTreeLearner::ResetConfig(const TreeConfig* tree_config) { ...@@ -165,7 +165,7 @@ void SerialTreeLearner::ResetConfig(const TreeConfig* tree_config) {
histogram_pool_.ResetConfig(tree_config_); histogram_pool_.ResetConfig(tree_config_);
} }
Tree* SerialTreeLearner::Train(const float* gradients, const float *hessians, bool is_constant_hessian) { Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians, bool is_constant_hessian) {
gradients_ = gradients; gradients_ = gradients;
hessians_ = hessians; hessians_ = hessians;
is_constant_hessian_ = is_constant_hessian; is_constant_hessian_ = is_constant_hessian;
...@@ -222,7 +222,7 @@ Tree* SerialTreeLearner::Train(const float* gradients, const float *hessians, bo ...@@ -222,7 +222,7 @@ Tree* SerialTreeLearner::Train(const float* gradients, const float *hessians, bo
return tree.release(); return tree.release();
} }
Tree* SerialTreeLearner::FitByExistingTree(const Tree* old_tree, const float* gradients, const float *hessians) const { Tree* SerialTreeLearner::FitByExistingTree(const Tree* old_tree, const score_t* gradients, const score_t *hessians) const {
auto tree = std::unique_ptr<Tree>(new Tree(*old_tree)); auto tree = std::unique_ptr<Tree>(new Tree(*old_tree));
CHECK(data_partition_->num_leaves() >= tree->num_leaves()); CHECK(data_partition_->num_leaves() >= tree->num_leaves());
OMP_INIT_EX(); OMP_INIT_EX();
......
...@@ -41,9 +41,9 @@ public: ...@@ -41,9 +41,9 @@ public:
void ResetConfig(const TreeConfig* tree_config) override; void ResetConfig(const TreeConfig* tree_config) override;
Tree* Train(const float* gradients, const float *hessians, bool is_constant_hessian) override; Tree* Train(const score_t* gradients, const score_t *hessians, bool is_constant_hessian) override;
Tree* FitByExistingTree(const Tree* old_tree, const float* gradients, const float* hessians) const override; Tree* FitByExistingTree(const Tree* old_tree, const score_t* gradients, const score_t* hessians) const override;
void SetBaggingData(const data_size_t* used_indices, data_size_t num_data) override { void SetBaggingData(const data_size_t* used_indices, data_size_t num_data) override {
data_partition_->SetUsedDataIndices(used_indices, num_data); data_partition_->SetUsedDataIndices(used_indices, num_data);
...@@ -111,9 +111,9 @@ protected: ...@@ -111,9 +111,9 @@ protected:
/*! \brief training data */ /*! \brief training data */
const Dataset* train_data_; const Dataset* train_data_;
/*! \brief gradients of current iteration */ /*! \brief gradients of current iteration */
const float* gradients_; const score_t* gradients_;
/*! \brief hessians of current iteration */ /*! \brief hessians of current iteration */
const float* hessians_; const score_t* hessians_;
/*! \brief training data partition on leaves */ /*! \brief training data partition on leaves */
std::unique_ptr<DataPartition> data_partition_; std::unique_ptr<DataPartition> data_partition_;
/*! \brief used for generate used features */ /*! \brief used for generate used features */
...@@ -137,14 +137,14 @@ protected: ...@@ -137,14 +137,14 @@ protected:
#ifdef USE_GPU #ifdef USE_GPU
/*! \brief gradients of current iteration, ordered for cache optimized, aligned to 4K page */ /*! \brief gradients of current iteration, ordered for cache optimized, aligned to 4K page */
std::vector<float, boost::alignment::aligned_allocator<float, 4096>> ordered_gradients_; std::vector<score_t, boost::alignment::aligned_allocator<score_t, 4096>> ordered_gradients_;
/*! \brief hessians of current iteration, ordered for cache optimized, aligned to 4K page */ /*! \brief hessians of current iteration, ordered for cache optimized, aligned to 4K page */
std::vector<float, boost::alignment::aligned_allocator<float, 4096>> ordered_hessians_; std::vector<score_t, boost::alignment::aligned_allocator<score_t, 4096>> ordered_hessians_;
#else #else
/*! \brief gradients of current iteration, ordered for cache optimized */ /*! \brief gradients of current iteration, ordered for cache optimized */
std::vector<float> ordered_gradients_; std::vector<score_t> ordered_gradients_;
/*! \brief hessians of current iteration, ordered for cache optimized */ /*! \brief hessians of current iteration, ordered for cache optimized */
std::vector<float> ordered_hessians_; std::vector<score_t> ordered_hessians_;
#endif #endif
/*! \brief Store ordered bin */ /*! \brief Store ordered bin */
......
...@@ -166,7 +166,7 @@ void VotingParallelTreeLearner<TREELEARNER_T>::GlobalVoting(int leaf_idx, const ...@@ -166,7 +166,7 @@ void VotingParallelTreeLearner<TREELEARNER_T>::GlobalVoting(int leaf_idx, const
return; return;
} }
// get mean number on machines // get mean number on machines
float mean_num_data = GetGlobalDataCountInLeaf(leaf_idx) / static_cast<float>(num_machines_); score_t mean_num_data = GetGlobalDataCountInLeaf(leaf_idx) / static_cast<score_t>(num_machines_);
std::vector<SplitInfo> feature_best_split(this->num_features_, SplitInfo()); std::vector<SplitInfo> feature_best_split(this->num_features_, SplitInfo());
for (auto & split : splits) { for (auto & split : splits) {
int fid = split.feature; int fid = split.feature;
......
...@@ -234,7 +234,6 @@ ...@@ -234,7 +234,6 @@
<ClInclude Include="..\src\objective\multiclass_objective.hpp" /> <ClInclude Include="..\src\objective\multiclass_objective.hpp" />
<ClInclude Include="..\src\treelearner\data_partition.hpp" /> <ClInclude Include="..\src\treelearner\data_partition.hpp" />
<ClInclude Include="..\src\treelearner\feature_histogram.hpp" /> <ClInclude Include="..\src\treelearner\feature_histogram.hpp" />
<ClInclude Include="..\src\treelearner\gpu_tree_learner.h" />
<ClInclude Include="..\src\treelearner\leaf_splits.hpp" /> <ClInclude Include="..\src\treelearner\leaf_splits.hpp" />
<ClInclude Include="..\src\treelearner\parallel_tree_learner.h" /> <ClInclude Include="..\src\treelearner\parallel_tree_learner.h" />
<ClInclude Include="..\src\treelearner\serial_tree_learner.h" /> <ClInclude Include="..\src\treelearner\serial_tree_learner.h" />
...@@ -262,7 +261,6 @@ ...@@ -262,7 +261,6 @@
<ClCompile Include="..\src\main.cpp" /> <ClCompile Include="..\src\main.cpp" />
<ClCompile Include="..\src\treelearner\data_parallel_tree_learner.cpp" /> <ClCompile Include="..\src\treelearner\data_parallel_tree_learner.cpp" />
<ClCompile Include="..\src\treelearner\feature_parallel_tree_learner.cpp" /> <ClCompile Include="..\src\treelearner\feature_parallel_tree_learner.cpp" />
<ClCompile Include="..\src\treelearner\gpu_tree_learner.cpp" />
<ClCompile Include="..\src\treelearner\serial_tree_learner.cpp" /> <ClCompile Include="..\src\treelearner\serial_tree_learner.cpp" />
<ClCompile Include="..\src\treelearner\tree_learner.cpp" /> <ClCompile Include="..\src\treelearner\tree_learner.cpp" />
<ClCompile Include="..\src\treelearner\voting_parallel_tree_learner.cpp" /> <ClCompile Include="..\src\treelearner\voting_parallel_tree_learner.cpp" />
......
...@@ -183,9 +183,6 @@ ...@@ -183,9 +183,6 @@
<ClInclude Include="..\src\metric\map_metric.hpp"> <ClInclude Include="..\src\metric\map_metric.hpp">
<Filter>src\metric</Filter> <Filter>src\metric</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\src\treelearner\gpu_tree_learner.h">
<Filter>src\treelearner</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="..\src\application\application.cpp"> <ClCompile Include="..\src\application\application.cpp">
...@@ -260,8 +257,5 @@ ...@@ -260,8 +257,5 @@
<ClCompile Include="..\src\treelearner\voting_parallel_tree_learner.cpp"> <ClCompile Include="..\src\treelearner\voting_parallel_tree_learner.cpp">
<Filter>src\treelearner</Filter> <Filter>src\treelearner</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\src\treelearner\gpu_tree_learner.cpp">
<Filter>src\treelearner</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment