Unverified Commit 631e0a2a authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[ci] prevent trailing whitespace, ensure files end with newline (#6373)

parent 6a1ec444
......@@ -29,5 +29,5 @@ Run the following command in this folder:
Data Format
-----------
To learn more about the query format used in this example, check out the
To learn more about the query format used in this example, check out the
[query data format](https://lightgbm.readthedocs.io/en/latest/Parameters.html#query-data).
......@@ -12,10 +12,10 @@ boosting_type = gbdt
objective = lambdarank
# eval metrics, support multi metric, delimited by ',' , support following metrics
# l1
# l1
# l2 , default metric for regression
# ndcg , default metric for lambdarank
# auc
# auc
# binary_logloss , default metric for binary
# binary_error
metric = ndcg
......@@ -32,7 +32,7 @@ is_training_metric = true
# column in data to use as label
label_column = 0
# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
max_bin = 255
# training data
......@@ -44,7 +44,7 @@ data = rank.train
# validation data, support multi validation data, separated by ','
# if existing weight file, should name to "rank.test.weight"
# if existing query file, should name to "rank.test.query"
# alias: valid, test, test_data,
# alias: valid, test, test_data,
valid_data = rank.test
# number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds
......@@ -64,10 +64,10 @@ num_leaves = 31
# alias: tree
tree_learner = serial
# number of threads for multi-threading. One thread will use one CPU, defalut is setted to #cpu.
# number of threads for multi-threading. One thread will use one CPU, defalut is setted to #cpu.
# num_threads = 8
# feature sub-sample, will random select 80% feature to train on each iteration
# feature sub-sample, will random select 80% feature to train on each iteration
# alias: sub_feature
feature_fraction = 1.0
......
......@@ -13,10 +13,10 @@ boosting_type = gbdt
objective = multiclass
# eval metrics, support multi metric, delimited by ',' , support following metrics
# l1
# l1
# l2 , default metric for regression
# ndcg , default metric for lambdarank
# auc
# auc
# binary_logloss , default metric for binary
# binary_error
# multi_logloss
......@@ -44,7 +44,7 @@ is_training_metric = true
# column in data to use as label
label_column = 0
# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
max_bin = 255
# training data
......
......@@ -12,10 +12,10 @@ boosting_type = gbdt
objective = binary
# eval metrics, support multi metric, delimite by ',' , support following metrics
# l1
# l1
# l2 , default metric for regression
# ndcg , default metric for lambdarank
# auc
# auc
# binary_logloss , default metric for binary
# binary_error
metric = binary_logloss,auc
......@@ -29,7 +29,7 @@ is_training_metric = true
# column in data to use as label
label_column = 0
# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
max_bin = 255
# training data
......@@ -39,7 +39,7 @@ data = binary.train
# validation data, support multi validation data, separated by ','
# if existing weight file, should name to "binary.test.weight"
# alias: valid, test, test_data,
# alias: valid, test, test_data,
valid_data = binary.test
# number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds
......@@ -62,7 +62,7 @@ tree_learner = feature
# number of threads for multi-threading. One thread will use each CPU. The default is the CPU count.
# num_threads = 8
# feature sub-sample, will random select 80% feature to train on each iteration
# feature sub-sample, will random select 80% feature to train on each iteration
# alias: sub_feature
feature_fraction = 0.8
......
......@@ -23,11 +23,11 @@ Examples include:
- [simple_example.py](https://github.com/microsoft/LightGBM/blob/master/examples/python-guide/simple_example.py)
- Construct Dataset
- Basic train and predict
- Eval during training
- Eval during training
- Early stopping
- Save model to file
- [sklearn_example.py](https://github.com/microsoft/LightGBM/blob/master/examples/python-guide/sklearn_example.py)
- Create data for learning with sklearn interface
- Create data for learning with sklearn interface
- Basic train and predict with sklearn interface
- Feature importances with sklearn interface
- Self-defined eval metric with sklearn interface
......
......@@ -12,10 +12,10 @@ boosting_type = gbdt
objective = regression
# eval metrics, support multi metric, delimite by ',' , support following metrics
# l1
# l1
# l2 , default metric for regression
# ndcg , default metric for lambdarank
# auc
# auc
# binary_logloss , default metric for binary
# binary_error
metric = l2
......@@ -29,7 +29,7 @@ is_training_metric = true
# column in data to use as label
label_column = 0
# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
max_bin = 255
# forced bin thresholds
......@@ -42,7 +42,7 @@ data = regression.train
# validation data, support multi validation data, separated by ','
# if exsting weight file, should name to "regression.test.weight"
# alias: valid, test, test_data,
# alias: valid, test, test_data,
valid_data = regression.test
# number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds
......@@ -62,10 +62,10 @@ num_leaves = 31
# alias: tree
tree_learner = serial
# number of threads for multi-threading. One thread will use one CPU, default is setted to #cpu.
# number of threads for multi-threading. One thread will use one CPU, default is setted to #cpu.
# num_threads = 8
# feature sub-sample, will random select 80% feature to train on each iteration
# feature sub-sample, will random select 80% feature to train on each iteration
# alias: sub_feature
feature_fraction = 0.9
......
......@@ -29,5 +29,5 @@ Run the following command in this folder:
Data Format
-----------
To learn more about the query format used in this example, check out the
To learn more about the query format used in this example, check out the
[query data format](https://lightgbm.readthedocs.io/en/latest/Parameters.html#query-data).
......@@ -12,10 +12,10 @@ boosting_type = gbdt
objective = rank_xendcg
# eval metrics, support multi metric, delimite by ',' , support following metrics
# l1
# l1
# l2 , default metric for regression
# ndcg , default metric for lambdarank
# auc
# auc
# binary_logloss , default metric for binary
# binary_error
metric = ndcg
......@@ -32,7 +32,7 @@ is_training_metric = true
# column in data to use as label
label_column = 0
# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
max_bin = 255
# training data
......@@ -44,7 +44,7 @@ data = rank.train
# validation data, support multi validation data, separated by ','
# if existing weight file, should name to "rank.test.weight"
# if existing query file, should name to "rank.test.query"
# alias: valid, test, test_data,
# alias: valid, test, test_data,
valid_data = rank.test
# number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds
......@@ -68,7 +68,7 @@ tree_learner = serial
num_threads = 1
objective_seed = 1025
# feature sub-sample, will random select 80% feature to train on each iteration
# feature sub-sample, will random select 80% feature to train on each iteration
# alias: sub_feature
feature_fraction = 1.0
......
......@@ -144,7 +144,7 @@ class BinMapper {
/*!
* \brief Maximum categorical value
* \return Maximum categorical value for categorical features, 0 for numerical features
* \return Maximum categorical value for categorical features, 0 for numerical features
*/
inline int MaxCatValue() const {
if (bin_2_categorical_.size() == 0) {
......
......@@ -128,7 +128,7 @@ class Network {
const ReduceFunction& reducer);
/*!
* \brief Performing all_gather by using Bruck algorithm.
* \brief Performing all_gather by using Bruck algorithm.
Communication times is O(log(n)), and communication cost is O(send_size * number_machine)
* It can be used when all nodes have same input size.
* \param input Input data
......@@ -138,7 +138,7 @@ class Network {
static void Allgather(char* input, comm_size_t send_size, char* output);
/*!
* \brief Performing all_gather by using Bruck algorithm.
* \brief Performing all_gather by using Bruck algorithm.
Communication times is O(log(n)), and communication cost is O(all_size)
* It can be used when nodes have different input size.
* \param input Input data
......@@ -150,7 +150,7 @@ class Network {
static void Allgather(char* input, const comm_size_t* block_start, const comm_size_t* block_len, char* output, comm_size_t all_size);
/*!
* \brief Perform reduce scatter by using recursive halving algorithm.
* \brief Perform reduce scatter by using recursive halving algorithm.
Communication times is O(log(n)), and communication cost is O(input_size)
* \param input Input data
* \param input_size The size of input data
......
......@@ -1232,7 +1232,7 @@ struct __TToStringHelper<T, true, true> {
* Converts an array to a string with with values separated by the space character.
* This method replaces Common's ``ArrayToString`` and ``ArrayToStringFast`` functionality
* and is locale-independent.
*
*
* \note If ``high_precision_output`` is set to true,
* floating point values are output with more digits of precision.
*/
......
PMML Generator
PMML Generator
==============
The old Python convert script is removed due to it cannot support the new format of categorical features.
......
......@@ -107,7 +107,7 @@ class SingleRowPredictorInner {
/*!
* \brief Object to store resources meant for single-row Fast Predict methods.
*
*
* For legacy reasons this is called `FastConfig` in the public C API.
*
* Meant to be used by the *Fast* predict methods only.
......
......@@ -25,36 +25,36 @@ typedef unsigned char uchar;
template<typename T>
__device__ double as_double(const T t) {
static_assert(sizeof(T) == sizeof(double), "size mismatch");
double d;
memcpy(&d, &t, sizeof(T));
double d;
memcpy(&d, &t, sizeof(T));
return d;
}
template<typename T>
__device__ unsigned long long as_ulong_ulong(const T t) {
static_assert(sizeof(T) == sizeof(unsigned long long), "size mismatch");
unsigned long long u;
memcpy(&u, &t, sizeof(T));
unsigned long long u;
memcpy(&u, &t, sizeof(T));
return u;
}
template<typename T>
__device__ float as_float(const T t) {
static_assert(sizeof(T) == sizeof(float), "size mismatch");
float f;
memcpy(&f, &t, sizeof(T));
float f;
memcpy(&f, &t, sizeof(T));
return f;
}
template<typename T>
__device__ unsigned int as_uint(const T t) {
static_assert(sizeof(T) == sizeof(unsigned int), "size_mismatch");
unsigned int u;
memcpy(&u, &t, sizeof(T));
unsigned int u;
memcpy(&u, &t, sizeof(T));
return u;
}
template<typename T>
__device__ uchar4 as_uchar4(const T t) {
static_assert(sizeof(T) == sizeof(uchar4), "size mismatch");
uchar4 u;
memcpy(&u, &t, sizeof(T));
uchar4 u;
memcpy(&u, &t, sizeof(T));
return u;
}
......@@ -158,4 +158,3 @@ DECLARE(histogram256);
} // namespace LightGBM
#endif // LIGHTGBM_TREELEARNER_KERNELS_HISTOGRAM_16_64_256_HU_
......@@ -38,7 +38,7 @@ class LeafSplits {
}
/*!
* \brief Init split on current leaf on partial data.
* \brief Init split on current leaf on partial data.
* \param leaf Index of current leaf
* \param data_partition current data partition
* \param sum_gradients
......@@ -54,7 +54,7 @@ class LeafSplits {
}
/*!
* \brief Init split on current leaf on partial data.
* \brief Init split on current leaf on partial data.
* \param leaf Index of current leaf
* \param data_partition current data partition
* \param sum_gradients
......
......@@ -73,12 +73,12 @@ typedef uint acc_int_type;
// local memory size in bytes
#define LOCAL_MEM_SIZE (DWORD_FEATURES * (sizeof(uint) + 2 * sizeof(acc_type)) * NUM_BINS * NUM_BANKS)
// unroll the atomic operation for a few times. Takes more code space,
// unroll the atomic operation for a few times. Takes more code space,
// but compiler can generate better code for faster atomics.
#define UNROLL_ATOMIC 1
// Options passed by compiler at run time:
// IGNORE_INDICES will be set when the kernel does not
// IGNORE_INDICES will be set when the kernel does not
// #define IGNORE_INDICES
// #define POWER_FEATURE_WORKGROUPS 10
......@@ -161,7 +161,7 @@ R""()
// this function will be called by histogram16
// we have one sub-histogram of one feature in registers, and need to read others
void within_kernel_reduction16x8(uchar8 feature_mask,
__global const acc_type* restrict feature4_sub_hist,
__global const acc_type* restrict feature4_sub_hist,
const uint skip_id,
acc_type stat_val,
const ushort num_sub_hist,
......@@ -173,7 +173,7 @@ void within_kernel_reduction16x8(uchar8 feature_mask,
uchar is_hessian_first = (ltid >> LOG2_DWORD_FEATURES) & 1; // hessian or gradient
ushort bin_id = ltid >> (LOG2_DWORD_FEATURES + 1); // range 0 - 16
ushort i;
#if POWER_FEATURE_WORKGROUPS != 0
#if POWER_FEATURE_WORKGROUPS != 0
// if there is only 1 work group, no need to do the reduction
// add all sub-histograms for 4 features
__global const acc_type* restrict p = feature4_sub_hist + ltid;
......@@ -185,7 +185,7 @@ void within_kernel_reduction16x8(uchar8 feature_mask,
// skip the counters we already have
p += 2 * DWORD_FEATURES * NUM_BINS;
for (i = i + 1; i < num_sub_hist; ++i) {
stat_val += *p;
stat_val += *p;
p += NUM_BINS * DWORD_FEATURES * 2;
}
#endif
......@@ -208,12 +208,12 @@ R""()
__attribute__((reqd_work_group_size(LOCAL_SIZE_0, 1, 1)))
#if USE_CONSTANT_BUF == 1
__kernel void histogram16(__global const uchar4* restrict feature_data_base,
__kernel void histogram16(__global const uchar4* restrict feature_data_base,
__constant const uchar8* restrict feature_masks __attribute__((max_constant_size(65536))),
const data_size_t feature_size,
__constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))),
const data_size_t num_data,
__constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))),
__constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))),
const data_size_t num_data,
__constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))),
#if CONST_HESSIAN == 0
__constant const score_t* restrict ordered_hessians __attribute__((max_constant_size(65536))),
#else
......@@ -223,18 +223,18 @@ __kernel void histogram16(__global const uchar4* restrict feature_data_base,
__global volatile int * sync_counters,
__global acc_type* restrict hist_buf_base) {
#else
__kernel void histogram16(__global const uchar4* feature_data_base,
__kernel void histogram16(__global const uchar4* feature_data_base,
__constant const uchar8* restrict feature_masks __attribute__((max_constant_size(65536))),
const data_size_t feature_size,
__global const data_size_t* data_indices,
const data_size_t num_data,
__global const score_t* ordered_gradients,
__global const data_size_t* data_indices,
const data_size_t num_data,
__global const score_t* ordered_gradients,
#if CONST_HESSIAN == 0
__global const score_t* ordered_hessians,
#else
const score_t const_hessian,
#endif
__global char* restrict output_buf,
__global char* restrict output_buf,
__global volatile int * sync_counters,
__global acc_type* restrict hist_buf_base) {
#endif
......@@ -260,38 +260,38 @@ __kernel void histogram16(__global const uchar4* feature_data_base,
// there are 8 banks (sub-histograms) used by 256 threads total 8 KB
/* memory layout of gh_hist:
-----------------------------------------------------------------------------------------------
bk0_g_f0_bin0 bk0_g_f1_bin0 bk0_g_f2_bin0 bk0_g_f3_bin0 bk0_g_f4_bin0 bk0_g_f5_bin0 bk0_g_f6_bin0 bk0_g_f7_bin0
bk0_g_f0_bin0 bk0_g_f1_bin0 bk0_g_f2_bin0 bk0_g_f3_bin0 bk0_g_f4_bin0 bk0_g_f5_bin0 bk0_g_f6_bin0 bk0_g_f7_bin0
bk0_h_f0_bin0 bk0_h_f1_bin0 bk0_h_f2_bin0 bk0_h_f3_bin0 bk0_h_f4_bin0 bk0_h_f5_bin0 bk0_h_f6_bin0 bk0_h_f7_bin0
bk1_g_f0_bin0 bk1_g_f1_bin0 bk1_g_f2_bin0 bk1_g_f3_bin0 bk1_g_f4_bin0 bk1_g_f5_bin0 bk1_g_f6_bin0 bk1_g_f7_bin0
bk1_g_f0_bin0 bk1_g_f1_bin0 bk1_g_f2_bin0 bk1_g_f3_bin0 bk1_g_f4_bin0 bk1_g_f5_bin0 bk1_g_f6_bin0 bk1_g_f7_bin0
bk1_h_f0_bin0 bk1_h_f1_bin0 bk1_h_f2_bin0 bk1_h_f3_bin0 bk1_h_f4_bin0 bk1_h_f5_bin0 bk1_h_f6_bin0 bk1_h_f7_bin0
bk2_g_f0_bin0 bk2_g_f1_bin0 bk2_g_f2_bin0 bk2_g_f3_bin0 bk2_g_f4_bin0 bk2_g_f5_bin0 bk2_g_f6_bin0 bk2_g_f7_bin0
bk2_g_f0_bin0 bk2_g_f1_bin0 bk2_g_f2_bin0 bk2_g_f3_bin0 bk2_g_f4_bin0 bk2_g_f5_bin0 bk2_g_f6_bin0 bk2_g_f7_bin0
bk2_h_f0_bin0 bk2_h_f1_bin0 bk2_h_f2_bin0 bk2_h_f3_bin0 bk2_h_f4_bin0 bk2_h_f5_bin0 bk2_h_f6_bin0 bk2_h_f7_bin0
bk3_g_f0_bin0 bk3_g_f1_bin0 bk3_g_f2_bin0 bk3_g_f3_bin0 bk3_g_f4_bin0 bk3_g_f5_bin0 bk3_g_f6_bin0 bk3_g_f7_bin0
bk3_g_f0_bin0 bk3_g_f1_bin0 bk3_g_f2_bin0 bk3_g_f3_bin0 bk3_g_f4_bin0 bk3_g_f5_bin0 bk3_g_f6_bin0 bk3_g_f7_bin0
bk3_h_f0_bin0 bk3_h_f1_bin0 bk3_h_f2_bin0 bk3_h_f3_bin0 bk3_h_f4_bin0 bk3_h_f5_bin0 bk3_h_f6_bin0 bk3_h_f7_bin0
bk4_g_f0_bin0 bk4_g_f1_bin0 bk4_g_f2_bin0 bk4_g_f3_bin0 bk4_g_f4_bin0 bk4_g_f5_bin0 bk4_g_f6_bin0 bk4_g_f7_bin0
bk4_g_f0_bin0 bk4_g_f1_bin0 bk4_g_f2_bin0 bk4_g_f3_bin0 bk4_g_f4_bin0 bk4_g_f5_bin0 bk4_g_f6_bin0 bk4_g_f7_bin0
bk4_h_f0_bin0 bk4_h_f1_bin0 bk4_h_f2_bin0 bk4_h_f3_bin0 bk4_h_f4_bin0 bk4_h_f5_bin0 bk4_h_f6_bin0 bk4_h_f7_bin0
bk5_g_f0_bin0 bk5_g_f1_bin0 bk5_g_f2_bin0 bk5_g_f3_bin0 bk5_g_f4_bin0 bk5_g_f5_bin0 bk5_g_f6_bin0 bk5_g_f7_bin0
bk5_g_f0_bin0 bk5_g_f1_bin0 bk5_g_f2_bin0 bk5_g_f3_bin0 bk5_g_f4_bin0 bk5_g_f5_bin0 bk5_g_f6_bin0 bk5_g_f7_bin0
bk5_h_f0_bin0 bk5_h_f1_bin0 bk5_h_f2_bin0 bk5_h_f3_bin0 bk5_h_f4_bin0 bk5_h_f5_bin0 bk5_h_f6_bin0 bk5_h_f7_bin0
bk6_g_f0_bin0 bk6_g_f1_bin0 bk6_g_f2_bin0 bk6_g_f3_bin0 bk6_g_f4_bin0 bk6_g_f5_bin0 bk6_g_f6_bin0 bk6_g_f7_bin0
bk6_g_f0_bin0 bk6_g_f1_bin0 bk6_g_f2_bin0 bk6_g_f3_bin0 bk6_g_f4_bin0 bk6_g_f5_bin0 bk6_g_f6_bin0 bk6_g_f7_bin0
bk6_h_f0_bin0 bk6_h_f1_bin0 bk6_h_f2_bin0 bk6_h_f3_bin0 bk6_h_f4_bin0 bk6_h_f5_bin0 bk6_h_f6_bin0 bk6_h_f7_bin0
bk7_g_f0_bin0 bk7_g_f1_bin0 bk7_g_f2_bin0 bk7_g_f3_bin0 bk7_g_f4_bin0 bk7_g_f5_bin0 bk7_g_f6_bin0 bk7_g_f7_bin0
bk7_g_f0_bin0 bk7_g_f1_bin0 bk7_g_f2_bin0 bk7_g_f3_bin0 bk7_g_f4_bin0 bk7_g_f5_bin0 bk7_g_f6_bin0 bk7_g_f7_bin0
bk7_h_f0_bin0 bk7_h_f1_bin0 bk7_h_f2_bin0 bk7_h_f3_bin0 bk7_h_f4_bin0 bk7_h_f5_bin0 bk7_h_f6_bin0 bk7_h_f7_bin0
...
bk0_g_f0_bin16 bk0_g_f1_bin16 bk0_g_f2_bin16 bk0_g_f3_bin16 bk0_g_f4_bin16 bk0_g_f5_bin16 bk0_g_f6_bin16 bk0_g_f7_bin16
bk0_g_f0_bin16 bk0_g_f1_bin16 bk0_g_f2_bin16 bk0_g_f3_bin16 bk0_g_f4_bin16 bk0_g_f5_bin16 bk0_g_f6_bin16 bk0_g_f7_bin16
bk0_h_f0_bin16 bk0_h_f1_bin16 bk0_h_f2_bin16 bk0_h_f3_bin16 bk0_h_f4_bin16 bk0_h_f5_bin16 bk0_h_f6_bin16 bk0_h_f7_bin16
bk1_g_f0_bin16 bk1_g_f1_bin16 bk1_g_f2_bin16 bk1_g_f3_bin16 bk1_g_f4_bin16 bk1_g_f5_bin16 bk1_g_f6_bin16 bk1_g_f7_bin16
bk1_g_f0_bin16 bk1_g_f1_bin16 bk1_g_f2_bin16 bk1_g_f3_bin16 bk1_g_f4_bin16 bk1_g_f5_bin16 bk1_g_f6_bin16 bk1_g_f7_bin16
bk1_h_f0_bin16 bk1_h_f1_bin16 bk1_h_f2_bin16 bk1_h_f3_bin16 bk1_h_f4_bin16 bk1_h_f5_bin16 bk1_h_f6_bin16 bk1_h_f7_bin16
bk2_g_f0_bin16 bk2_g_f1_bin16 bk2_g_f2_bin16 bk2_g_f3_bin16 bk2_g_f4_bin16 bk2_g_f5_bin16 bk2_g_f6_bin16 bk2_g_f7_bin16
bk2_g_f0_bin16 bk2_g_f1_bin16 bk2_g_f2_bin16 bk2_g_f3_bin16 bk2_g_f4_bin16 bk2_g_f5_bin16 bk2_g_f6_bin16 bk2_g_f7_bin16
bk2_h_f0_bin16 bk2_h_f1_bin16 bk2_h_f2_bin16 bk2_h_f3_bin16 bk2_h_f4_bin16 bk2_h_f5_bin16 bk2_h_f6_bin16 bk2_h_f7_bin16
bk3_g_f0_bin16 bk3_g_f1_bin16 bk3_g_f2_bin16 bk3_g_f3_bin16 bk3_g_f4_bin16 bk3_g_f5_bin16 bk3_g_f6_bin16 bk3_g_f7_bin16
bk3_g_f0_bin16 bk3_g_f1_bin16 bk3_g_f2_bin16 bk3_g_f3_bin16 bk3_g_f4_bin16 bk3_g_f5_bin16 bk3_g_f6_bin16 bk3_g_f7_bin16
bk3_h_f0_bin16 bk3_h_f1_bin16 bk3_h_f2_bin16 bk3_h_f3_bin16 bk3_h_f4_bin16 bk3_h_f5_bin16 bk3_h_f6_bin16 bk3_h_f7_bin16
bk4_g_f0_bin16 bk4_g_f1_bin16 bk4_g_f2_bin16 bk4_g_f3_bin16 bk4_g_f4_bin16 bk4_g_f5_bin16 bk4_g_f6_bin16 bk4_g_f7_bin16
bk4_g_f0_bin16 bk4_g_f1_bin16 bk4_g_f2_bin16 bk4_g_f3_bin16 bk4_g_f4_bin16 bk4_g_f5_bin16 bk4_g_f6_bin16 bk4_g_f7_bin16
bk4_h_f0_bin16 bk4_h_f1_bin16 bk4_h_f2_bin16 bk4_h_f3_bin16 bk4_h_f4_bin16 bk4_h_f5_bin16 bk4_h_f6_bin16 bk4_h_f7_bin16
bk5_g_f0_bin16 bk5_g_f1_bin16 bk5_g_f2_bin16 bk5_g_f3_bin16 bk5_g_f4_bin16 bk5_g_f5_bin16 bk5_g_f6_bin16 bk5_g_f7_bin16
bk5_g_f0_bin16 bk5_g_f1_bin16 bk5_g_f2_bin16 bk5_g_f3_bin16 bk5_g_f4_bin16 bk5_g_f5_bin16 bk5_g_f6_bin16 bk5_g_f7_bin16
bk5_h_f0_bin16 bk5_h_f1_bin16 bk5_h_f2_bin16 bk5_h_f3_bin16 bk5_h_f4_bin16 bk5_h_f5_bin16 bk5_h_f6_bin16 bk5_h_f7_bin16
bk6_g_f0_bin16 bk6_g_f1_bin16 bk6_g_f2_bin16 bk6_g_f3_bin16 bk6_g_f4_bin16 bk6_g_f5_bin16 bk6_g_f6_bin16 bk6_g_f7_bin16
bk6_g_f0_bin16 bk6_g_f1_bin16 bk6_g_f2_bin16 bk6_g_f3_bin16 bk6_g_f4_bin16 bk6_g_f5_bin16 bk6_g_f6_bin16 bk6_g_f7_bin16
bk6_h_f0_bin16 bk6_h_f1_bin16 bk6_h_f2_bin16 bk6_h_f3_bin16 bk6_h_f4_bin16 bk6_h_f5_bin16 bk6_h_f6_bin16 bk6_h_f7_bin16
bk7_g_f0_bin16 bk7_g_f1_bin16 bk7_g_f2_bin16 bk7_g_f3_bin16 bk7_g_f4_bin16 bk7_g_f5_bin16 bk7_g_f6_bin16 bk7_g_f7_bin16
bk7_g_f0_bin16 bk7_g_f1_bin16 bk7_g_f2_bin16 bk7_g_f3_bin16 bk7_g_f4_bin16 bk7_g_f5_bin16 bk7_g_f6_bin16 bk7_g_f7_bin16
bk7_h_f0_bin16 bk7_h_f1_bin16 bk7_h_f2_bin16 bk7_h_f3_bin16 bk7_h_f4_bin16 bk7_h_f5_bin16 bk7_h_f6_bin16 bk7_h_f7_bin16
-----------------------------------------------------------------------------------------------
*/
......@@ -333,7 +333,7 @@ __kernel void histogram16(__global const uchar4* feature_data_base,
uchar is_hessian_first = (ltid >> LOG2_DWORD_FEATURES) & 1;
// thread 0-15 write result to bank0, 16-31 to bank1, 32-47 to bank2, 48-63 to bank3, etc
ushort bank = (ltid >> (LOG2_DWORD_FEATURES + 1)) & BANK_MASK;
ushort group_feature = group_id >> POWER_FEATURE_WORKGROUPS;
// each 2^POWER_FEATURE_WORKGROUPS workgroups process on one feature (compile-time constant)
// feature_size is the number of examples per feature
......@@ -615,12 +615,12 @@ R""()
)""
R""()
*/
#if ENABLE_ALL_FEATURES == 0
// restore feature_mask
feature_mask = feature_masks[group_feature];
#endif
// now reduce the 4 banks of subhistograms into 1
acc_type stat_val = 0.0f;
uint cnt_val = 0;
......@@ -644,7 +644,7 @@ R""()
}
}
#endif
// now thread 0 - 7 holds feature 0 - 7's gradient for bin 0 and counter bin 0
// now thread 8 - 15 holds feature 0 - 7's hessian for bin 0 and counter bin 1
// now thread 16- 23 holds feature 0 - 7's gradient for bin 1 and counter bin 2
......@@ -664,7 +664,7 @@ R""()
// thread 8 - 15 read counters stored by thread 0 - 7
// thread 24- 31 read counters stored by thread 8 - 15
// thread 40- 47 read counters stored by thread 16- 23, etc
stat_val = const_hessian *
stat_val = const_hessian *
cnt_hist[((ltid - DWORD_FEATURES) >> (LOG2_DWORD_FEATURES + 1)) * DWORD_FEATURES + (ltid & DWORD_FEATURES_MASK)];
}
else {
......@@ -688,12 +688,12 @@ R""()
h_f0_bin1 h_f1_bin1 h_f2_bin1 h_f3_bin1 h_f4_bin1 h_f5_bin1 h_f6_bin1 h_f7_bin1
...
...
g_f0_bin16 g_f1_bin16 g_f2_bin16 g_f3_bin16 g_f4_bin16 g_f5_bin16 g_f6_bin16 g_f7_bin16
h_f0_bin16 h_f1_bin16 h_f2_bin16 h_f3_bin16 h_f4_bin16 h_f5_bin16 h_f6_bin16 h_f7_bin16
g_f0_bin16 g_f1_bin16 g_f2_bin16 g_f3_bin16 g_f4_bin16 g_f5_bin16 g_f6_bin16 g_f7_bin16
h_f0_bin16 h_f1_bin16 h_f2_bin16 h_f3_bin16 h_f4_bin16 h_f5_bin16 h_f6_bin16 h_f7_bin16
c_f0_bin0 c_f1_bin0 c_f2_bin0 c_f3_bin0 c_f4_bin0 c_f5_bin0 c_f6_bin0 c_f7_bin0
c_f0_bin1 c_f1_bin1 c_f2_bin1 c_f3_bin1 c_f4_bin1 c_f5_bin1 c_f6_bin1 c_f7_bin1
...
c_f0_bin16 c_f1_bin16 c_f2_bin16 c_f3_bin16 c_f4_bin16 c_f5_bin16 c_f6_bin16 c_f7_bin16
c_f0_bin16 c_f1_bin16 c_f2_bin16 c_f3_bin16 c_f4_bin16 c_f5_bin16 c_f6_bin16 c_f7_bin16
*/
// if there is only one workgroup processing this feature4, don't even need to write
uint feature4_id = (group_id >> POWER_FEATURE_WORKGROUPS);
......@@ -704,7 +704,7 @@ R""()
output[0 * DWORD_FEATURES * NUM_BINS + ltid] = stat_val;
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
mem_fence(CLK_GLOBAL_MEM_FENCE);
// To avoid the cost of an extra reducing kernel, we have to deal with some
// To avoid the cost of an extra reducing kernel, we have to deal with some
// gray area in OpenCL. We want the last work group that process this feature to
// make the final reduction, and other threads will just quit.
// This requires that the results written by other workgroups available to the
......@@ -750,13 +750,13 @@ R""()
#endif
// locate our feature4's block in output memory
uint output_offset = (feature4_id << POWER_FEATURE_WORKGROUPS);
__global acc_type const * restrict feature4_subhists =
__global acc_type const * restrict feature4_subhists =
(__global acc_type *)output_buf + output_offset * DWORD_FEATURES * 2 * NUM_BINS;
// skip reading the data already in local memory
uint skip_id = group_id ^ output_offset;
// locate output histogram location for this feature4
__global acc_type* restrict hist_buf = hist_buf_base + feature4_id * DWORD_FEATURES * 2 * NUM_BINS;
within_kernel_reduction16x8(feature_mask, feature4_subhists, skip_id, stat_val,
within_kernel_reduction16x8(feature_mask, feature4_subhists, skip_id, stat_val,
1 << POWER_FEATURE_WORKGROUPS, hist_buf, (__local acc_type *)shared_array);
}
}
......
......@@ -47,12 +47,12 @@ typedef uint acc_int_type;
#endif
#define LOCAL_MEM_SIZE (4 * (sizeof(uint) + 2 * sizeof(acc_type)) * NUM_BINS)
// unroll the atomic operation for a few times. Takes more code space,
// unroll the atomic operation for a few times. Takes more code space,
// but compiler can generate better code for faster atomics.
#define UNROLL_ATOMIC 1
// Options passed by compiler at run time:
// IGNORE_INDICES will be set when the kernel does not
// IGNORE_INDICES will be set when the kernel does not
// #define IGNORE_INDICES
// #define POWER_FEATURE_WORKGROUPS 10
......@@ -137,7 +137,7 @@ R""()
// this function will be called by histogram256
// we have one sub-histogram of one feature in local memory, and need to read others
void within_kernel_reduction256x4(uchar4 feature_mask,
__global const acc_type* restrict feature4_sub_hist,
__global const acc_type* restrict feature4_sub_hist,
const uint skip_id,
const uint old_val_f0_cont_bin0,
const ushort num_sub_hist,
......@@ -314,12 +314,12 @@ R""()
*/
__attribute__((reqd_work_group_size(LOCAL_SIZE_0, 1, 1)))
#if USE_CONSTANT_BUF == 1
__kernel void histogram256(__global const uchar4* restrict feature_data_base,
__kernel void histogram256(__global const uchar4* restrict feature_data_base,
__constant const uchar4* restrict feature_masks __attribute__((max_constant_size(65536))),
const data_size_t feature_size,
__constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))),
const data_size_t num_data,
__constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))),
__constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))),
const data_size_t num_data,
__constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))),
#if CONST_HESSIAN == 0
__constant const score_t* restrict ordered_hessians __attribute__((max_constant_size(65536))),
#else
......@@ -329,18 +329,18 @@ __kernel void histogram256(__global const uchar4* restrict feature_data_base,
__global volatile int * sync_counters,
__global acc_type* restrict hist_buf_base) {
#else
__kernel void histogram256(__global const uchar4* feature_data_base,
__kernel void histogram256(__global const uchar4* feature_data_base,
__constant const uchar4* restrict feature_masks __attribute__((max_constant_size(65536))),
const data_size_t feature_size,
__global const data_size_t* data_indices,
const data_size_t num_data,
__global const score_t* ordered_gradients,
__global const data_size_t* data_indices,
const data_size_t num_data,
__global const score_t* ordered_gradients,
#if CONST_HESSIAN == 0
__global const score_t* ordered_hessians,
#else
const score_t const_hessian,
#endif
__global char* restrict output_buf,
__global char* restrict output_buf,
__global volatile int * sync_counters,
__global acc_type* restrict hist_buf_base) {
#endif
......@@ -363,20 +363,20 @@ __kernel void histogram256(__global const uchar4* feature_data_base,
// gradient/hessian histograms
// assume this starts at 32 * 4 = 128-byte boundary
// total size: 2 * 4 * 256 * size_of(float) = 8 KB
// organization: each feature/grad/hessian is at a different bank,
// organization: each feature/grad/hessian is at a different bank,
// as independent of the feature value as possible
__local acc_type * gh_hist = (__local acc_type *)shared_array;
// counter histogram
// total size: 4 * 256 * size_of(uint) = 4 KB
#if CONST_HESSIAN == 1
__local uint * cnt_hist = (__local uint *)(gh_hist + 2 * 4 * NUM_BINS);
#endif
#endif
// thread 0, 1, 2, 3 compute histograms for gradients first
// thread 4, 5, 6, 7 compute histograms for Hessians first
// etc.
uchar is_hessian_first = (ltid >> 2) & 1;
ushort group_feature = group_id >> POWER_FEATURE_WORKGROUPS;
// each 2^POWER_FEATURE_WORKGROUPS workgroups process on one feature (compile-time constant)
// feature_size is the number of examples per feature
......@@ -725,7 +725,7 @@ R""()
}
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
mem_fence(CLK_GLOBAL_MEM_FENCE);
// To avoid the cost of an extra reducing kernel, we have to deal with some
// To avoid the cost of an extra reducing kernel, we have to deal with some
// gray area in OpenCL. We want the last work group that process this feature to
// make the final reduction, and other threads will just quit.
// This requires that the results written by other workgroups available to the
......@@ -773,15 +773,15 @@ R""()
#endif
// locate our feature4's block in output memory
uint output_offset = (feature4_id << POWER_FEATURE_WORKGROUPS);
__global acc_type const * restrict feature4_subhists =
__global acc_type const * restrict feature4_subhists =
(__global acc_type *)output_buf + output_offset * 4 * 2 * NUM_BINS;
// skip reading the data already in local memory
uint skip_id = group_id ^ output_offset;
// locate output histogram location for this feature4
__global acc_type* restrict hist_buf = hist_buf_base + feature4_id * 4 * 2 * NUM_BINS;
within_kernel_reduction256x4(feature_mask, feature4_subhists, skip_id, old_val, 1 << POWER_FEATURE_WORKGROUPS,
within_kernel_reduction256x4(feature_mask, feature4_subhists, skip_id, old_val, 1 << POWER_FEATURE_WORKGROUPS,
hist_buf, (__local acc_type *)shared_array);
// if (ltid == 0)
// if (ltid == 0)
// printf("workgroup %d reduction done, %g %g %g %g %g %g %g %g\n", group_id, hist_buf[0], hist_buf[3*NUM_BINS], hist_buf[2*3*NUM_BINS], hist_buf[3*3*NUM_BINS], hist_buf[1], hist_buf[3*NUM_BINS+1], hist_buf[2*3*NUM_BINS+1], hist_buf[3*3*NUM_BINS+1]);
}
}
......
......@@ -65,12 +65,12 @@ typedef uint acc_int_type;
// local memory size in bytes
#define LOCAL_MEM_SIZE (4 * (sizeof(uint) + 2 * sizeof(acc_type)) * NUM_BINS * NUM_BANKS)
// unroll the atomic operation for a few times. Takes more code space,
// unroll the atomic operation for a few times. Takes more code space,
// but compiler can generate better code for faster atomics.
#define UNROLL_ATOMIC 1
// Options passed by compiler at run time:
// IGNORE_INDICES will be set when the kernel does not
// IGNORE_INDICES will be set when the kernel does not
// #define IGNORE_INDICES
// #define POWER_FEATURE_WORKGROUPS 10
......@@ -155,7 +155,7 @@ R""()
// this function will be called by histogram64
// we have one sub-histogram of one feature in registers, and need to read others
void within_kernel_reduction64x4(uchar4 feature_mask,
__global const acc_type* restrict feature4_sub_hist,
__global const acc_type* restrict feature4_sub_hist,
const uint skip_id,
acc_type g_val, acc_type h_val,
const ushort num_sub_hist,
......@@ -166,7 +166,7 @@ void within_kernel_reduction64x4(uchar4 feature_mask,
ushort feature_id = ltid & 3; // range 0 - 4
const ushort bin_id = ltid >> 2; // range 0 - 63W
ushort i;
#if POWER_FEATURE_WORKGROUPS != 0
#if POWER_FEATURE_WORKGROUPS != 0
// if there is only 1 work group, no need to do the reduction
// add all sub-histograms for 4 features
__global const acc_type* restrict p = feature4_sub_hist + ltid;
......@@ -212,12 +212,12 @@ R""()
*/
__attribute__((reqd_work_group_size(LOCAL_SIZE_0, 1, 1)))
#if USE_CONSTANT_BUF == 1
__kernel void histogram64(__global const uchar4* restrict feature_data_base,
__kernel void histogram64(__global const uchar4* restrict feature_data_base,
__constant const uchar4* restrict feature_masks __attribute__((max_constant_size(65536))),
const data_size_t feature_size,
__constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))),
const data_size_t num_data,
__constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))),
__constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))),
const data_size_t num_data,
__constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))),
#if CONST_HESSIAN == 0
__constant const score_t* restrict ordered_hessians __attribute__((max_constant_size(65536))),
#else
......@@ -227,18 +227,18 @@ __kernel void histogram64(__global const uchar4* restrict feature_data_base,
__global volatile int * sync_counters,
__global acc_type* restrict hist_buf_base) {
#else
__kernel void histogram64(__global const uchar4* feature_data_base,
__kernel void histogram64(__global const uchar4* feature_data_base,
__constant const uchar4* restrict feature_masks __attribute__((max_constant_size(65536))),
const data_size_t feature_size,
__global const data_size_t* data_indices,
const data_size_t num_data,
__global const score_t* ordered_gradients,
__global const data_size_t* data_indices,
const data_size_t num_data,
__global const score_t* ordered_gradients,
#if CONST_HESSIAN == 0
__global const score_t* ordered_hessians,
#else
const score_t const_hessian,
#endif
__global char* restrict output_buf,
__global char* restrict output_buf,
__global volatile int * sync_counters,
__global acc_type* restrict hist_buf_base) {
#endif
......@@ -313,7 +313,7 @@ __kernel void histogram64(__global const uchar4* feature_data_base,
uchar is_hessian_first = (ltid >> 2) & 1;
// thread 0-7 write result to bank0, 8-15 to bank1, 16-23 to bank2, 24-31 to bank3
ushort bank = (ltid >> 3) & BANK_MASK;
ushort group_feature = group_id >> POWER_FEATURE_WORKGROUPS;
// each 2^POWER_FEATURE_WORKGROUPS workgroups process on one feature (compile-time constant)
// feature_size is the number of examples per feature
......@@ -582,7 +582,7 @@ R""()
atomic_local_add_f(gh_hist + addr2, s0_stat2);
#endif
barrier(CLK_LOCAL_MEM_FENCE);
/* Makes MSVC happy with long string literal
)""
R""()
......@@ -591,7 +591,7 @@ R""()
// restore feature_mask
feature_mask = feature_masks[group_feature];
#endif
// now reduce the 4 banks of subhistograms into 1
/* memory layout of gh_hist:
-----------------------------------------------------------------------------------------------
......@@ -680,7 +680,7 @@ R""()
output[1 * 4 * NUM_BINS + ltid] = h_val;
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
mem_fence(CLK_GLOBAL_MEM_FENCE);
// To avoid the cost of an extra reducing kernel, we have to deal with some
// To avoid the cost of an extra reducing kernel, we have to deal with some
// gray area in OpenCL. We want the last work group that process this feature to
// make the final reduction, and other threads will just quit.
// This requires that the results written by other workgroups available to the
......@@ -726,13 +726,13 @@ R""()
#endif
// locate our feature4's block in output memory
uint output_offset = (feature4_id << POWER_FEATURE_WORKGROUPS);
__global acc_type const * restrict feature4_subhists =
__global acc_type const * restrict feature4_subhists =
(__global acc_type *)output_buf + output_offset * 4 * 2 * NUM_BINS;
// skip reading the data already in local memory
uint skip_id = group_id ^ output_offset;
// locate output histogram location for this feature4
__global acc_type* restrict hist_buf = hist_buf_base + feature4_id * 4 * 2 * NUM_BINS;
within_kernel_reduction64x4(feature_mask, feature4_subhists, skip_id, g_val, h_val,
within_kernel_reduction64x4(feature_mask, feature4_subhists, skip_id, g_val, h_val,
1 << POWER_FEATURE_WORKGROUPS, hist_buf, (__local acc_type *)shared_array);
}
}
......
......@@ -6,7 +6,7 @@
*/
/**
* This wraps the StringArray.hpp class for SWIG usage,
* adding the basic C-style wrappers needed to make it
* adding the basic C-style wrappers needed to make it
* usable for the users of the low-level lightgbmJNI API.
*/
......@@ -23,7 +23,7 @@
/**
* @brief Creates a new StringArray and returns its handle.
*
*
* @param num_strings number of strings to store.
* @param string_size the maximum number of characters that can be stored in each string.
* @return StringArrayHandle or nullptr in case of allocation failure.
......@@ -38,7 +38,7 @@
/**
* @brief Free the StringArray object.
*
*
* @param handle StringArray handle.
*/
void StringArrayHandle_free(StringArrayHandle handle)
......@@ -49,7 +49,7 @@
/**
* @brief Return the raw pointer to the array of strings.
* Wrapped in Java into String[] automatically.
*
*
* @param handle StringArray handle.
* @return Raw pointer to the string array which `various.i` maps to String[].
*/
......@@ -60,7 +60,7 @@
/**
* For the end user to extract a specific string from the StringArray object.
*
*
* @param handle StringArray handle.
* @param index index of the string to retrieve from the array.
* @return raw pointer to string at index, or nullptr if out of bounds.
......@@ -72,7 +72,7 @@
/**
* @brief Replaces one string of the array at index with the new content.
*
*
* @param handle StringArray handle.
* @param index Index of the string to replace
* @param new_content The content to replace
......@@ -85,7 +85,7 @@
/**
* @brief Retrieve the number of strings in the StringArray.
*
*
* @param handle StringArray handle.
* @return number of strings that the array stores.
*/
......
......@@ -6,11 +6,11 @@
* This SWIG interface extension provides support to
* the pointer manipulation methods present in the standard
* SWIG wrappers, but with support for larger arrays.
*
*
* SWIG provides this in https://github.com/swig/swig/blob/master/Lib/carrays.i
* but the standard methods only provide arrays with up to
* max(int32_t) elements.
*
*
* The `long_array_functions` wrappers extend this
* to arrays of size max(int64_t) instead of max(int32_t).
*/
......@@ -103,7 +103,7 @@ void delete_##NAME(TYPE *ary);
TYPE NAME##_getitem(TYPE *ary, int64_t index);
void NAME##_setitem(TYPE *ary, int64_t index, TYPE value);
%enddef
%enddef
/* Custom template for arrays of pointers */
%define %ptr_array_functions(TYPE,NAME)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment