cuda_pointwise_metric.cu 3.31 KB
Newer Older
1
2
3
4
5
6
/*!
 * Copyright (c) 2022 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for
 * license information.
 */

7
#ifdef USE_CUDA
8
9
10

#include <LightGBM/cuda/cuda_algorithms.hpp>

11
12
#include "cuda_binary_metric.hpp"
#include "cuda_pointwise_metric.hpp"
13
14
15
16
17
18
#include "cuda_regression_metric.hpp"

namespace LightGBM {

template <typename CUDA_METRIC, bool USE_WEIGHTS>
__global__ void EvalKernel(const data_size_t num_data, const label_t* labels, const label_t* weights,
19
                           const double* scores, double* reduce_block_buffer) {
20
21
22
23
  __shared__ double shared_mem_buffer[32];
  const data_size_t index = static_cast<data_size_t>(threadIdx.x + blockIdx.x * blockDim.x);
  double point_metric = 0.0;
  if (index < num_data) {
24
25
26
    point_metric = USE_WEIGHTS ?
      CUDA_METRIC::MetricOnPointCUDA(labels[index], scores[index]) * weights[index] :
      CUDA_METRIC::MetricOnPointCUDA(labels[index], scores[index]);
27
28
  }
  const double block_sum_point_metric = ShuffleReduceSum<double>(point_metric, shared_mem_buffer, NUM_DATA_PER_EVAL_THREAD);
29
30
31
  if (threadIdx.x == 0) {
    reduce_block_buffer[blockIdx.x] = block_sum_point_metric;
  }
32
33
34
35
36
  if (USE_WEIGHTS) {
    double weight = 0.0;
    if (index < num_data) {
      weight = static_cast<double>(weights[index]);
      const double block_sum_weight = ShuffleReduceSum<double>(weight, shared_mem_buffer, NUM_DATA_PER_EVAL_THREAD);
37
38
39
      if (threadIdx.x == 0) {
        reduce_block_buffer[blockIdx.x + gridDim.x] = block_sum_weight;
      }
40
41
42
43
44
    }
  }
}

template <typename HOST_METRIC, typename CUDA_METRIC>
45
void CUDAPointwiseMetricInterface<HOST_METRIC, CUDA_METRIC>::LaunchEvalKernel(const double* score, double* sum_loss, double* sum_weight) const {
46
47
48
49
50
51
52
53
54
  const int num_blocks = (this->num_data_ + NUM_DATA_PER_EVAL_THREAD - 1) / NUM_DATA_PER_EVAL_THREAD;
  if (this->cuda_weights_ != nullptr) {
    EvalKernel<CUDA_METRIC, true><<<num_blocks, NUM_DATA_PER_EVAL_THREAD>>>(
      this->num_data_, this->cuda_labels_, this->cuda_weights_, score, reduce_block_buffer_.RawData());
  } else {
    EvalKernel<CUDA_METRIC, false><<<num_blocks, NUM_DATA_PER_EVAL_THREAD>>>(
      this->num_data_, this->cuda_labels_, this->cuda_weights_, score, reduce_block_buffer_.RawData());
  }
  ShuffleReduceSumGlobal<double, double>(reduce_block_buffer_.RawData(), num_blocks, reduce_block_buffer_inner_.RawData());
55
56
  CopyFromCUDADeviceToHost<double>(sum_loss, reduce_block_buffer_inner_.RawData(), 1, __FILE__, __LINE__);
  *sum_weight = static_cast<double>(this->num_data_);
57
58
  if (this->cuda_weights_ != nullptr) {
    ShuffleReduceSumGlobal<double, double>(reduce_block_buffer_.RawData() + num_blocks, num_blocks, reduce_block_buffer_inner_.RawData());
59
    CopyFromCUDADeviceToHost<double>(sum_weight, reduce_block_buffer_inner_.RawData(), 1, __FILE__, __LINE__);
60
61
62
  }
}

63
64
65
template void CUDAPointwiseMetricInterface<RMSEMetric, CUDARMSEMetric>::LaunchEvalKernel(const double* score, double* sum_loss, double* sum_weight) const;
template void CUDAPointwiseMetricInterface<L2Metric, CUDAL2Metric>::LaunchEvalKernel(const double* score, double* sum_loss, double* sum_weight) const;
template void CUDAPointwiseMetricInterface<BinaryLoglossMetric, CUDABinaryLoglossMetric>::LaunchEvalKernel(const double* score, double* sum_loss, double* sum_weight) const;
66
67
68

}  // namespace LightGBM

69
#endif  // USE_CUDA