"tests/vscode:/vscode.git/clone" did not exist on "4f47547c88cc72de4b8f04de8de4e2b79a9240e7"
cuda_kernel_launcher.h 2.53 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
/*!
 * Copyright (c) 2020 IBM Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
#ifndef LIGHTGBM_TREELEARNER_CUDA_KERNEL_LAUNCHER_H_
#define LIGHTGBM_TREELEARNER_CUDA_KERNEL_LAUNCHER_H_

#ifdef USE_CUDA
#include <chrono>
#include "kernels/histogram_16_64_256.hu"  // kernel, acc_type, data_size_t, uchar, score_t

namespace LightGBM {

struct ThreadData {
          // device id
          int             device_id;
          // parameters for cuda_histogram
          int             histogram_size;
          data_size_t     leaf_num_data;
          data_size_t     num_data;
          bool            use_all_features;
          bool            is_constant_hessian;
          int             num_workgroups;
          cudaStream_t    stream;
          uint8_t*        device_features;
          uint8_t*        device_feature_masks;
          data_size_t*    device_data_indices;
          score_t*        device_gradients;
          score_t*        device_hessians;
          score_t         hessians_const;
          char*           device_subhistograms;
          volatile int*   sync_counters;
          void*           device_histogram_outputs;
          size_t          exp_workgroups_per_feature;
          // cuda events
          cudaEvent_t*    kernel_start;
          cudaEvent_t*    kernel_wait_obj;
          std::chrono::duration<double, std::milli>* kernel_input_wait_time;
          // copy histogram
          size_t        output_size;
          char*                 host_histogram_output;
          cudaEvent_t*          histograms_wait_obj;
};


void cuda_histogram(
                int             histogram_size,
                data_size_t     leaf_num_data,
                data_size_t     num_data,
                bool            use_all_features,
                bool            is_constant_hessian,
                int             num_workgroups,
                cudaStream_t    stream,
                uint8_t*        arg0,
                uint8_t*        arg1,
                data_size_t     arg2,
                data_size_t*    arg3,
                data_size_t     arg4,
                score_t*        arg5,
                score_t*        arg6,
                score_t         arg6_const,
                char*           arg7,
                volatile int*   arg8,
                void*           arg9,
                size_t          exp_workgroups_per_feature);

}  // namespace LightGBM

#endif  // USE_CUDA
#endif  // LIGHTGBM_TREELEARNER_CUDA_KERNEL_LAUNCHER_H_