/*! * Copyright (c) 2020 Microsoft Corporation. All rights reserved. * Licensed under the MIT License. See LICENSE file in the project root for license information. */ #ifndef LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_ #define LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_ #include #include #include #include #include #include namespace LightGBM { template class MultiValSparseBin : public MultiValBin { public: explicit MultiValSparseBin(data_size_t num_data, int num_bin, double estimate_element_per_row) : num_data_(num_data), num_bin_(num_bin), estimate_element_per_row_(estimate_element_per_row) { row_ptr_.resize(num_data_ + 1, 0); INDEX_T estimate_num_data = static_cast(estimate_element_per_row_ * 1.1 * num_data_); int num_threads = OMP_NUM_THREADS(); if (num_threads > 1) { t_data_.resize(num_threads - 1); for (size_t i = 0; i < t_data_.size(); ++i) { t_data_[i].resize(estimate_num_data / num_threads); } } t_size_.resize(num_threads, 0); data_.resize(estimate_num_data / num_threads); } ~MultiValSparseBin() {} data_size_t num_data() const override { return num_data_; } int num_bin() const override { return num_bin_; } double num_element_per_row() const override { return estimate_element_per_row_; } void PushOneRow(int tid, data_size_t idx, const std::vector& values) override { const int pre_alloc_size = 50; row_ptr_[idx + 1] = static_cast(values.size()); if (tid == 0) { if (t_size_[tid] + row_ptr_[idx + 1] > static_cast(data_.size())) { data_.resize(t_size_[tid] + row_ptr_[idx + 1] * pre_alloc_size); } for (auto val : values) { data_[t_size_[tid]++] = static_cast(val); } } else { if (t_size_[tid] + row_ptr_[idx + 1] > static_cast(t_data_[tid - 1].size())) { t_data_[tid - 1].resize(t_size_[tid] + row_ptr_[idx + 1] * pre_alloc_size); } for (auto val : values) { t_data_[tid - 1][t_size_[tid]++] = static_cast(val); } } } void MergeData(const INDEX_T* sizes) { Common::FunctionTimer fun_time("MultiValSparseBin::MergeData", global_timer); for (data_size_t i = 0; i < num_data_; ++i) { row_ptr_[i + 1] += row_ptr_[i]; } if (t_data_.size() > 0) { std::vector offsets(1 + t_data_.size()); offsets[0] = sizes[0]; for (size_t tid = 0; tid < t_data_.size() - 1; ++tid) { offsets[tid + 1] = offsets[tid] + sizes[tid + 1]; } data_.resize(row_ptr_[num_data_]); #pragma omp parallel for schedule(static, 1) for (int tid = 0; tid < static_cast(t_data_.size()); ++tid) { std::copy_n(t_data_[tid].data(), sizes[tid + 1], data_.data() + offsets[tid]); } } else { data_.resize(row_ptr_[num_data_]); } } void FinishLoad() override { MergeData(t_size_.data()); t_size_.clear(); row_ptr_.shrink_to_fit(); data_.shrink_to_fit(); t_data_.clear(); t_data_.shrink_to_fit(); // update estimate_element_per_row_ by all data estimate_element_per_row_ = static_cast(row_ptr_[num_data_]) / num_data_; } bool IsSparse() override { return true; } #define ACC_GH(hist, i, g, h) \ const auto ti = static_cast(i) << 1; \ hist[ti] += g; \ hist[ti + 1] += h; template void ConstructHistogramInner(const data_size_t* data_indices, data_size_t start, data_size_t end, const score_t* gradients, const score_t* hessians, hist_t* out) const { data_size_t i = start; if (use_prefetch) { const data_size_t pf_offset = 32 / sizeof(VAL_T); const data_size_t pf_end = end - pf_offset; for (; i < pf_end; ++i) { const auto idx = use_indices ? data_indices[i] : i; const auto pf_idx = use_indices ? data_indices[i + pf_offset] : i + pf_offset; PREFETCH_T0(gradients + pf_idx); if (use_hessians) { PREFETCH_T0(hessians + pf_idx); } PREFETCH_T0(row_ptr_.data() + pf_idx); PREFETCH_T0(data_.data() + row_ptr_[pf_idx]); const auto j_start = RowPtr(idx); const auto j_end = RowPtr(idx + 1); for (auto j = j_start; j < j_end; ++j) { const VAL_T bin = data_[j]; if (use_hessians) { ACC_GH(out, bin, gradients[idx], hessians[idx]); } else { ACC_GH(out, bin, gradients[idx], 1.0f); } } } } for (; i < end; ++i) { const auto idx = use_indices ? data_indices[i] : i; const auto j_start = RowPtr(idx); const auto j_end = RowPtr(idx + 1); for (auto j = j_start; j < j_end; ++j) { const VAL_T bin = data_[j]; if (use_hessians) { ACC_GH(out, bin, gradients[idx], hessians[idx]); } else { ACC_GH(out, bin, gradients[idx], 1.0f); } } } } #undef ACC_GH void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end, const score_t* gradients, const score_t* hessians, hist_t* out) const override { ConstructHistogramInner(data_indices, start, end, gradients, hessians, out); } void ConstructHistogram(data_size_t start, data_size_t end, const score_t* gradients, const score_t* hessians, hist_t* out) const override { ConstructHistogramInner(nullptr, start, end, gradients, hessians, out); } void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end, const score_t* gradients, hist_t* out) const override { ConstructHistogramInner(data_indices, start, end, gradients, nullptr, out); } void ConstructHistogram(data_size_t start, data_size_t end, const score_t* gradients, hist_t* out) const override { ConstructHistogramInner(nullptr, start, end, gradients, nullptr, out); } MultiValBin* CreateLike(data_size_t num_data, int num_bin, int, double estimate_element_per_row) const override { return new MultiValSparseBin(num_data, num_bin, estimate_element_per_row); } void ReSize(data_size_t num_data, int num_bin, int, double estimate_element_per_row) override { num_data_ = num_data; num_bin_ = num_bin; estimate_element_per_row_ = estimate_element_per_row; INDEX_T estimate_num_data = static_cast(estimate_element_per_row_ * 1.1 * num_data_); size_t npart = 1 + t_data_.size(); INDEX_T avg_num_data = static_cast(estimate_num_data / npart); if (static_cast(data_.size()) < avg_num_data) { data_.resize(avg_num_data, 0); } for (size_t i = 0; i < t_data_.size(); ++i) { if (static_cast(t_data_[i].size()) < avg_num_data) { t_data_[i].resize(avg_num_data, 0); } } if (num_data_ + 1 > static_cast(row_ptr_.size())) { row_ptr_.resize(num_data_ + 1); } } template void CopyInner(const MultiValBin* full_bin, const data_size_t* used_indices, data_size_t num_used_indices, const std::vector& lower, const std::vector& upper, const std::vector& delta) { const auto other = reinterpret_cast*>(full_bin); if (SUBROW) { CHECK(num_data_ == num_used_indices); } int n_block = 1; data_size_t block_size = num_data_; Threading::BlockInfo(static_cast(t_data_.size() + 1), num_data_, 1024, &n_block, &block_size); std::vector sizes(t_data_.size() + 1, 0); const int pre_alloc_size = 50; #pragma omp parallel for schedule(static, 1) for (int tid = 0; tid < n_block; ++tid) { data_size_t start = tid * block_size; data_size_t end = std::min(num_data_, start + block_size); auto& buf = (tid == 0) ? data_ : t_data_[tid - 1]; INDEX_T size = 0; for (data_size_t i = start; i < end; ++i) { const auto j_start = SUBROW ? other->RowPtr(used_indices[i]) : other->RowPtr(i); const auto j_end = SUBROW ? other->RowPtr(used_indices[i] + 1) : other->RowPtr(i + 1); if (size + (j_end - j_start) > static_cast(buf.size())) { buf.resize(size + (j_end - j_start) * pre_alloc_size); } int k = 0; const auto pre_size = size; for (auto j = j_start; j < j_end; ++j) { const auto val = other->data_[j]; if (SUBCOL) { while (val >= upper[k]) { ++k; } if (val >= lower[k]) { buf[size++] = static_cast(val - delta[k]); } } else { buf[size++] = val; } } row_ptr_[i + 1] = size - pre_size; } sizes[tid] = size; } MergeData(sizes.data()); } void CopySubrow(const MultiValBin* full_bin, const data_size_t* used_indices, data_size_t num_used_indices) override { CopyInner(full_bin, used_indices, num_used_indices, std::vector(), std::vector(), std::vector()); } void CopySubcol(const MultiValBin* full_bin, const std::vector&, const std::vector& lower, const std::vector& upper, const std::vector& delta) override { CopyInner(full_bin, nullptr, num_data_, lower, upper, delta); } void CopySubrowAndSubcol(const MultiValBin* full_bin, const data_size_t* used_indices, data_size_t num_used_indices, const std::vector&, const std::vector& lower, const std::vector& upper, const std::vector& delta) override { CopyInner(full_bin, used_indices, num_used_indices, lower, upper, delta); } inline INDEX_T RowPtr(data_size_t idx) const { return row_ptr_[idx]; } MultiValSparseBin* Clone() override; private: data_size_t num_data_; int num_bin_; double estimate_element_per_row_; std::vector> data_; std::vector> row_ptr_; std::vector>> t_data_; std::vector t_size_; MultiValSparseBin( const MultiValSparseBin& other) : num_data_(other.num_data_), num_bin_(other.num_bin_), estimate_element_per_row_(other.estimate_element_per_row_), data_(other.data_), row_ptr_(other.row_ptr_) {} }; template MultiValSparseBin* MultiValSparseBin::Clone() { return new MultiValSparseBin(*this); } } // namespace LightGBM #endif // LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_