Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
0a4a7a86
Commit
0a4a7a86
authored
Apr 13, 2019
by
Nikita Titov
Committed by
Guolin Ke
Apr 13, 2019
Browse files
fixed cpplint errors about spaces and newlines (#2102)
parent
32ef7603
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
97 additions
and
84 deletions
+97
-84
helpers/parameter_generator.py
helpers/parameter_generator.py
+1
-1
include/LightGBM/c_api.h
include/LightGBM/c_api.h
+76
-63
include/LightGBM/config.h
include/LightGBM/config.h
+1
-1
include/LightGBM/utils/common.h
include/LightGBM/utils/common.h
+4
-4
src/c_api.cpp
src/c_api.cpp
+2
-2
src/io/config_auto.cpp
src/io/config_auto.cpp
+1
-1
src/treelearner/serial_tree_learner.cpp
src/treelearner/serial_tree_learner.cpp
+12
-12
No files found.
helpers/parameter_generator.py
View file @
0a4a7a86
...
...
@@ -313,7 +313,7 @@ def gen_parameter_code(config_hpp, config_out_cpp):
# tails
str_to_write
+=
" return str_buf.str();
\n
"
str_to_write
+=
"}
\n\n
"
str_to_write
+=
"}
\n
"
str_to_write
+=
"}
// namespace LightGBM
\n
"
with
open
(
config_out_cpp
,
"w"
)
as
config_out_cpp_file
:
config_out_cpp_file
.
write
(
str_to_write
)
...
...
include/LightGBM/c_api.h
View file @
0a4a7a86
...
...
@@ -168,11 +168,11 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromCSR(const void* indptr,
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_DatasetCreateFromCSRFunc
(
void
*
get_row_funptr
,
int
num_rows
,
int64_t
num_col
,
const
char
*
parameters
,
const
DatasetHandle
reference
,
DatasetHandle
*
out
);
int
num_rows
,
int64_t
num_col
,
const
char
*
parameters
,
const
DatasetHandle
reference
,
DatasetHandle
*
out
);
/*!
...
...
@@ -253,12 +253,11 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromMats(int32_t nmat,
* \param out subset of data
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_DatasetGetSubset
(
const
DatasetHandle
handle
,
const
int32_t
*
used_row_indices
,
int32_t
num_used_row_indices
,
const
char
*
parameters
,
DatasetHandle
*
out
);
LIGHTGBM_C_EXPORT
int
LGBM_DatasetGetSubset
(
const
DatasetHandle
handle
,
const
int32_t
*
used_row_indices
,
int32_t
num_used_row_indices
,
const
char
*
parameters
,
DatasetHandle
*
out
);
/*!
* \brief save feature names to Dataset
...
...
@@ -267,10 +266,9 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetGetSubset(
* \param num_feature_names number of feature names
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_DatasetSetFeatureNames
(
DatasetHandle
handle
,
const
char
**
feature_names
,
int
num_feature_names
);
LIGHTGBM_C_EXPORT
int
LGBM_DatasetSetFeatureNames
(
DatasetHandle
handle
,
const
char
**
feature_names
,
int
num_feature_names
);
/*!
...
...
@@ -280,10 +278,9 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetSetFeatureNames(
* \param num_feature_names number of feature names
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_DatasetGetFeatureNames
(
DatasetHandle
handle
,
char
**
feature_names
,
int
*
num_feature_names
);
LIGHTGBM_C_EXPORT
int
LGBM_DatasetGetFeatureNames
(
DatasetHandle
handle
,
char
**
feature_names
,
int
*
num_feature_names
);
/*!
...
...
@@ -348,7 +345,8 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetGetField(DatasetHandle handle,
* \param handle a instance of data matrix
* \param parameters parameters
*/
LIGHTGBM_C_EXPORT
int
LGBM_DatasetUpdateParam
(
DatasetHandle
handle
,
const
char
*
parameters
);
LIGHTGBM_C_EXPORT
int
LGBM_DatasetUpdateParam
(
DatasetHandle
handle
,
const
char
*
parameters
);
/*!
* \brief get number of data.
...
...
@@ -397,10 +395,9 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterCreate(const DatasetHandle train_data,
* \param out handle of created Booster
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_BoosterCreateFromModelfile
(
const
char
*
filename
,
int
*
out_num_iterations
,
BoosterHandle
*
out
);
LIGHTGBM_C_EXPORT
int
LGBM_BoosterCreateFromModelfile
(
const
char
*
filename
,
int
*
out_num_iterations
,
BoosterHandle
*
out
);
/*!
* \brief load an existing boosting from string
...
...
@@ -409,10 +406,9 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterCreateFromModelfile(
* \param out handle of created Booster
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_BoosterLoadModelFromString
(
const
char
*
model_str
,
int
*
out_num_iterations
,
BoosterHandle
*
out
);
LIGHTGBM_C_EXPORT
int
LGBM_BoosterLoadModelFromString
(
const
char
*
model_str
,
int
*
out_num_iterations
,
BoosterHandle
*
out
);
/*!
* \brief free obj in handle
...
...
@@ -424,7 +420,9 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterFree(BoosterHandle handle);
/*!
* \brief Shuffle Models
*/
LIGHTGBM_C_EXPORT
int
LGBM_BoosterShuffleModels
(
BoosterHandle
handle
,
int
start_iter
,
int
end_iter
);
LIGHTGBM_C_EXPORT
int
LGBM_BoosterShuffleModels
(
BoosterHandle
handle
,
int
start_iter
,
int
end_iter
);
/*!
* \brief Merge model in two booster to first handle
...
...
@@ -459,7 +457,8 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterResetTrainingData(BoosterHandle handle,
* \param parameters format: 'key1=value1 key2=value2'
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_BoosterResetParameter
(
BoosterHandle
handle
,
const
char
*
parameters
);
LIGHTGBM_C_EXPORT
int
LGBM_BoosterResetParameter
(
BoosterHandle
handle
,
const
char
*
parameters
);
/*!
* \brief Get number of class
...
...
@@ -467,7 +466,8 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterResetParameter(BoosterHandle handle, const cha
* \param out_len number of class
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_BoosterGetNumClasses
(
BoosterHandle
handle
,
int
*
out_len
);
LIGHTGBM_C_EXPORT
int
LGBM_BoosterGetNumClasses
(
BoosterHandle
handle
,
int
*
out_len
);
/*!
* \brief update the model in one round
...
...
@@ -475,7 +475,8 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetNumClasses(BoosterHandle handle, int* out_l
* \param is_finished 1 means finised(cannot split any more)
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_BoosterUpdateOneIter
(
BoosterHandle
handle
,
int
*
is_finished
);
LIGHTGBM_C_EXPORT
int
LGBM_BoosterUpdateOneIter
(
BoosterHandle
handle
,
int
*
is_finished
);
/*!
* \brief Refit the tree model using the new data (online learning)
...
...
@@ -485,7 +486,10 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterUpdateOneIter(BoosterHandle handle, int* is_fi
* \param ncol number of columns of leaf_preds
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_BoosterRefit
(
BoosterHandle
handle
,
const
int32_t
*
leaf_preds
,
int32_t
nrow
,
int32_t
ncol
);
LIGHTGBM_C_EXPORT
int
LGBM_BoosterRefit
(
BoosterHandle
handle
,
const
int32_t
*
leaf_preds
,
int32_t
nrow
,
int32_t
ncol
);
/*!
* \brief update the model, by directly specify gradient and second order gradient,
...
...
@@ -513,28 +517,32 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterRollbackOneIter(BoosterHandle handle);
* \param out_iteration iteration of boosting rounds
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_BoosterGetCurrentIteration
(
BoosterHandle
handle
,
int
*
out_iteration
);
LIGHTGBM_C_EXPORT
int
LGBM_BoosterGetCurrentIteration
(
BoosterHandle
handle
,
int
*
out_iteration
);
/*!
* \brief Get number of tree per iteration
* \param out_tree_per_iteration number of tree per iteration
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_BoosterNumModelPerIteration
(
BoosterHandle
handle
,
int
*
out_tree_per_iteration
);
LIGHTGBM_C_EXPORT
int
LGBM_BoosterNumModelPerIteration
(
BoosterHandle
handle
,
int
*
out_tree_per_iteration
);
/*!
* \brief Get number of weak sub-models
* \param out_models number of weak sub-models
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_BoosterNumberOfTotalModel
(
BoosterHandle
handle
,
int
*
out_models
);
LIGHTGBM_C_EXPORT
int
LGBM_BoosterNumberOfTotalModel
(
BoosterHandle
handle
,
int
*
out_models
);
/*!
* \brief Get number of eval
* \param out_len total number of eval results
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_BoosterGetEvalCounts
(
BoosterHandle
handle
,
int
*
out_len
);
LIGHTGBM_C_EXPORT
int
LGBM_BoosterGetEvalCounts
(
BoosterHandle
handle
,
int
*
out_len
);
/*!
* \brief Get name of eval
...
...
@@ -542,7 +550,9 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int* out_l
* \param out_strs names of eval result, need to pre-allocate memory before call this
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_BoosterGetEvalNames
(
BoosterHandle
handle
,
int
*
out_len
,
char
**
out_strs
);
LIGHTGBM_C_EXPORT
int
LGBM_BoosterGetEvalNames
(
BoosterHandle
handle
,
int
*
out_len
,
char
**
out_strs
);
/*!
* \brief Get name of features
...
...
@@ -550,14 +560,17 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalNames(BoosterHandle handle, int* out_le
* \param out_strs names of features, need to pre-allocate memory before call this
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_BoosterGetFeatureNames
(
BoosterHandle
handle
,
int
*
out_len
,
char
**
out_strs
);
LIGHTGBM_C_EXPORT
int
LGBM_BoosterGetFeatureNames
(
BoosterHandle
handle
,
int
*
out_len
,
char
**
out_strs
);
/*!
* \brief Get number of features
* \param out_len total number of features
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_BoosterGetNumFeature
(
BoosterHandle
handle
,
int
*
out_len
);
LIGHTGBM_C_EXPORT
int
LGBM_BoosterGetNumFeature
(
BoosterHandle
handle
,
int
*
out_len
);
/*!
* \brief get evaluation for training data and validation data
...
...
@@ -707,19 +720,19 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSR(BoosterHandle handle,
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_BoosterPredictForCSRSingleRow
(
BoosterHandle
handle
,
const
void
*
indptr
,
int
indptr_type
,
const
int32_t
*
indices
,
const
void
*
data
,
int
data_type
,
int64_t
nindptr
,
int64_t
nelem
,
int64_t
num_col
,
int
predict_type
,
int
num_iteration
,
const
char
*
parameter
,
int64_t
*
out_len
,
double
*
out_result
);
const
void
*
indptr
,
int
indptr_type
,
const
int32_t
*
indices
,
const
void
*
data
,
int
data_type
,
int64_t
nindptr
,
int64_t
nelem
,
int64_t
num_col
,
int
predict_type
,
int
num_iteration
,
const
char
*
parameter
,
int64_t
*
out_len
,
double
*
out_result
);
/*!
...
...
@@ -816,15 +829,15 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMat(BoosterHandle handle,
* \param out_result used to set a pointer to array, should allocate memory before call this function
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT
int
LGBM_BoosterPredictForMatSingleRow
(
BoosterHandle
handle
,
const
void
*
data
,
int
data_type
,
int
ncol
,
int
is_row_major
,
int
predict_type
,
int
num_iteration
,
const
char
*
parameter
,
int64_t
*
out_len
,
double
*
out_result
);
const
void
*
data
,
int
data_type
,
int
ncol
,
int
is_row_major
,
int
predict_type
,
int
num_iteration
,
const
char
*
parameter
,
int64_t
*
out_len
,
double
*
out_result
);
/*!
* \brief make prediction for an new data set
...
...
@@ -856,7 +869,7 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMats(BoosterHandle handle,
const
char
*
parameter
,
int64_t
*
out_len
,
double
*
out_result
);
/*!
* \brief save model into file
* \param handle handle
...
...
include/LightGBM/config.h
View file @
0a4a7a86
...
...
@@ -399,7 +399,7 @@ struct Config {
// default = 0,0,...,0
// desc = cost-effective gradient boosting penalty for using a feature
// desc = applied once per forest
std
::
vector
<
double
>
cegb_penalty_feature_coupled
;
std
::
vector
<
double
>
cegb_penalty_feature_coupled
;
#pragma endregion
...
...
include/LightGBM/utils/common.h
View file @
0a4a7a86
...
...
@@ -814,20 +814,20 @@ inline static void ObtainMinMaxSum(const T1 *w, int nw, T1 *mi, T1 *ma, T2 *su)
}
}
inline
static
std
::
vector
<
uint32_t
>
EmptyBitset
(
int
n
){
inline
static
std
::
vector
<
uint32_t
>
EmptyBitset
(
int
n
)
{
int
size
=
n
/
32
;
if
(
n
%
32
!=
0
)
size
++
;
if
(
n
%
32
!=
0
)
++
size
;
return
std
::
vector
<
uint32_t
>
(
size
);
}
template
<
typename
T
>
inline
static
void
InsertBitset
(
std
::
vector
<
uint32_t
>&
vec
,
const
T
val
){
inline
static
void
InsertBitset
(
std
::
vector
<
uint32_t
>&
vec
,
const
T
val
)
{
int
i1
=
val
/
32
;
int
i2
=
val
%
32
;
if
(
static_cast
<
int
>
(
vec
.
size
())
<
i1
+
1
)
{
vec
.
resize
(
i1
+
1
,
0
);
}
vec
[
i1
]
|=
(
1
<<
i2
);
vec
[
i1
]
|=
(
1
<<
i2
);
}
template
<
typename
T
>
...
...
src/c_api.cpp
View file @
0a4a7a86
...
...
@@ -220,9 +220,9 @@ class Booster {
is_raw_score
=
false
;
}
// TODO: config could be optimized away... (maybe using lambda callback?)
// TODO
(eisber)
: config could be optimized away... (maybe using lambda callback?)
single_row_predictor_
.
reset
(
new
Predictor
(
boosting_
.
get
(),
num_iteration
,
is_raw_score
,
is_predict_leaf
,
predict_contrib
,
config
.
pred_early_stop
,
config
.
pred_early_stop_freq
,
config
.
pred_early_stop_margin
));
config
.
pred_early_stop
,
config
.
pred_early_stop_freq
,
config
.
pred_early_stop_margin
));
single_row_num_pred_in_one_row_
=
boosting_
->
NumPredictOneRow
(
num_iteration
,
is_predict_leaf
,
predict_contrib
);
single_row_predict_function_
=
single_row_predictor_
->
GetPredictFunction
();
}
...
...
src/io/config_auto.cpp
View file @
0a4a7a86
...
...
@@ -644,4 +644,4 @@ std::string Config::SaveMembersToString() const {
return
str_buf
.
str
();
}
}
}
// namespace LightGBM
src/treelearner/serial_tree_learner.cpp
View file @
0a4a7a86
...
...
@@ -107,10 +107,10 @@ void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian
feature_used
.
clear
();
feature_used
.
resize
(
train_data
->
num_features
());
if
(
!
config_
->
cegb_penalty_feature_coupled
.
empty
()){
if
(
!
config_
->
cegb_penalty_feature_coupled
.
empty
())
{
CHECK
(
config_
->
cegb_penalty_feature_coupled
.
size
()
==
static_cast
<
size_t
>
(
train_data_
->
num_total_features
()));
}
if
(
!
config_
->
cegb_penalty_feature_lazy
.
empty
()){
if
(
!
config_
->
cegb_penalty_feature_lazy
.
empty
())
{
CHECK
(
config_
->
cegb_penalty_feature_lazy
.
size
()
==
static_cast
<
size_t
>
(
train_data_
->
num_total_features
()));
feature_used_in_data
=
Common
::
EmptyBitset
(
train_data
->
num_features
()
*
num_data_
);
}
...
...
@@ -535,10 +535,10 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
&
smaller_split
);
smaller_split
.
feature
=
real_fidx
;
smaller_split
.
gain
-=
config_
->
cegb_tradeoff
*
config_
->
cegb_penalty_split
*
smaller_leaf_splits_
->
num_data_in_leaf
();
if
(
!
config_
->
cegb_penalty_feature_coupled
.
empty
()
&&
!
feature_used
[
feature_index
]){
if
(
!
config_
->
cegb_penalty_feature_coupled
.
empty
()
&&
!
feature_used
[
feature_index
])
{
smaller_split
.
gain
-=
config_
->
cegb_tradeoff
*
config_
->
cegb_penalty_feature_coupled
[
real_fidx
];
}
if
(
!
config_
->
cegb_penalty_feature_lazy
.
empty
()){
if
(
!
config_
->
cegb_penalty_feature_lazy
.
empty
())
{
smaller_split
.
gain
-=
config_
->
cegb_tradeoff
*
CalculateOndemandCosts
(
real_fidx
,
smaller_leaf_splits_
->
LeafIndex
());
}
splits_per_leaf_
[
smaller_leaf_splits_
->
LeafIndex
()
*
train_data_
->
num_features
()
+
feature_index
]
=
smaller_split
;
...
...
@@ -566,10 +566,10 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
&
larger_split
);
larger_split
.
feature
=
real_fidx
;
larger_split
.
gain
-=
config_
->
cegb_tradeoff
*
config_
->
cegb_penalty_split
*
larger_leaf_splits_
->
num_data_in_leaf
();
if
(
!
config_
->
cegb_penalty_feature_coupled
.
empty
()
&&
!
feature_used
[
feature_index
]){
if
(
!
config_
->
cegb_penalty_feature_coupled
.
empty
()
&&
!
feature_used
[
feature_index
])
{
larger_split
.
gain
-=
config_
->
cegb_tradeoff
*
config_
->
cegb_penalty_feature_coupled
[
real_fidx
];
}
if
(
!
config_
->
cegb_penalty_feature_lazy
.
empty
()){
if
(
!
config_
->
cegb_penalty_feature_lazy
.
empty
())
{
larger_split
.
gain
-=
config_
->
cegb_tradeoff
*
CalculateOndemandCosts
(
real_fidx
,
larger_leaf_splits_
->
LeafIndex
());
}
splits_per_leaf_
[
larger_leaf_splits_
->
LeafIndex
()
*
train_data_
->
num_features
()
+
feature_index
]
=
larger_split
;
...
...
@@ -757,18 +757,18 @@ int32_t SerialTreeLearner::ForceSplits(Tree* tree, Json& forced_split_json, int*
void
SerialTreeLearner
::
Split
(
Tree
*
tree
,
int
best_leaf
,
int
*
left_leaf
,
int
*
right_leaf
)
{
const
SplitInfo
&
best_split_info
=
best_split_per_leaf_
[
best_leaf
];
const
int
inner_feature_index
=
train_data_
->
InnerFeatureIndex
(
best_split_info
.
feature
);
if
(
!
config_
->
cegb_penalty_feature_coupled
.
empty
()
&&
!
feature_used
[
inner_feature_index
]){
if
(
!
config_
->
cegb_penalty_feature_coupled
.
empty
()
&&
!
feature_used
[
inner_feature_index
])
{
feature_used
[
inner_feature_index
]
=
true
;
for
(
int
i
=
0
;
i
<
tree
->
num_leaves
();
++
i
){
if
(
i
==
best_leaf
)
continue
;
for
(
int
i
=
0
;
i
<
tree
->
num_leaves
();
++
i
)
{
if
(
i
==
best_leaf
)
continue
;
auto
split
=
&
splits_per_leaf_
[
i
*
train_data_
->
num_features
()
+
inner_feature_index
];
split
->
gain
+=
config_
->
cegb_tradeoff
*
config_
->
cegb_penalty_feature_coupled
[
best_split_info
.
feature
];
if
(
*
split
>
best_split_per_leaf_
[
i
])
best_split_per_leaf_
[
i
]
=
*
split
;
if
(
*
split
>
best_split_per_leaf_
[
i
])
best_split_per_leaf_
[
i
]
=
*
split
;
}
}
if
(
!
config_
->
cegb_penalty_feature_lazy
.
empty
()){
if
(
!
config_
->
cegb_penalty_feature_lazy
.
empty
())
{
data_size_t
cnt_leaf_data
=
0
;
auto
tmp_idx
=
data_partition_
->
GetIndexOnLeaf
(
best_leaf
,
&
cnt_leaf_data
);
for
(
data_size_t
i_input
=
0
;
i_input
<
cnt_leaf_data
;
++
i_input
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment