Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
5a80b788
Unverified
Commit
5a80b788
authored
Mar 02, 2020
by
Nikita Titov
Committed by
GitHub
Mar 02, 2020
Browse files
introduced specific CHECKs (#2849)
parent
8d90bbe3
Changes
13
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
59 additions
and
35 deletions
+59
-35
include/LightGBM/tree.h
include/LightGBM/tree.h
+1
-1
include/LightGBM/utils/log.h
include/LightGBM/utils/log.h
+24
-0
src/application/predictor.hpp
src/application/predictor.hpp
+2
-2
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+3
-3
src/boosting/gbdt_model_text.cpp
src/boosting/gbdt_model_text.cpp
+2
-2
src/c_api.cpp
src/c_api.cpp
+1
-1
src/io/bin.cpp
src/io/bin.cpp
+2
-2
src/io/dataset.cpp
src/io/dataset.cpp
+5
-5
src/io/dataset_loader.cpp
src/io/dataset_loader.cpp
+7
-7
src/metric/dcg_calculator.cpp
src/metric/dcg_calculator.cpp
+1
-1
src/metric/regression_metric.hpp
src/metric/regression_metric.hpp
+2
-2
src/treelearner/feature_histogram.hpp
src/treelearner/feature_histogram.hpp
+1
-1
src/treelearner/serial_tree_learner.cpp
src/treelearner/serial_tree_learner.cpp
+8
-8
No files found.
include/LightGBM/tree.h
View file @
5a80b788
...
@@ -513,7 +513,7 @@ inline void Tree::PredictContrib(const double* feature_values, int num_features,
...
@@ -513,7 +513,7 @@ inline void Tree::PredictContrib(const double* feature_values, int num_features,
output
[
num_features
]
+=
ExpectedValue
();
output
[
num_features
]
+=
ExpectedValue
();
// Run the recursion with preallocated space for the unique path data
// Run the recursion with preallocated space for the unique path data
if
(
num_leaves_
>
1
)
{
if
(
num_leaves_
>
1
)
{
CHECK
(
max_depth_
>=
0
);
CHECK
_GE
(
max_depth_
,
0
);
const
int
max_path_len
=
max_depth_
+
1
;
const
int
max_path_len
=
max_depth_
+
1
;
std
::
vector
<
PathElement
>
unique_path_data
(
max_path_len
*
(
max_path_len
+
1
)
/
2
);
std
::
vector
<
PathElement
>
unique_path_data
(
max_path_len
*
(
max_path_len
+
1
)
/
2
);
TreeSHAP
(
feature_values
,
output
,
0
,
0
,
unique_path_data
.
data
(),
1
,
1
,
-
1
);
TreeSHAP
(
feature_values
,
output
,
0
,
0
,
unique_path_data
.
data
(),
1
,
1
,
-
1
);
...
...
include/LightGBM/utils/log.h
View file @
5a80b788
...
@@ -28,6 +28,30 @@ namespace LightGBM {
...
@@ -28,6 +28,30 @@ namespace LightGBM {
" at %s, line %d .\n", __FILE__, __LINE__);
" at %s, line %d .\n", __FILE__, __LINE__);
#endif
#endif
#ifndef CHECK_EQ
#define CHECK_EQ(a, b) CHECK((a) == (b))
#endif
#ifndef CHECK_NE
#define CHECK_NE(a, b) CHECK((a) != (b))
#endif
#ifndef CHECK_GE
#define CHECK_GE(a, b) CHECK((a) >= (b))
#endif
#ifndef CHECK_LE
#define CHECK_LE(a, b) CHECK((a) <= (b))
#endif
#ifndef CHECK_GT
#define CHECK_GT(a, b) CHECK((a) > (b))
#endif
#ifndef CHECK_LT
#define CHECK_LT(a, b) CHECK((a) < (b))
#endif
#ifndef CHECK_NOTNULL
#ifndef CHECK_NOTNULL
#define CHECK_NOTNULL(pointer) \
#define CHECK_NOTNULL(pointer) \
if ((pointer) == nullptr) LightGBM::Log::Fatal(#pointer " Can't be NULL at %s, line %d .\n", __FILE__, __LINE__);
if ((pointer) == nullptr) LightGBM::Log::Fatal(#pointer " Can't be NULL at %s, line %d .\n", __FILE__, __LINE__);
...
...
src/application/predictor.hpp
View file @
5a80b788
...
@@ -43,8 +43,8 @@ class Predictor {
...
@@ -43,8 +43,8 @@ class Predictor {
"none"
,
LightGBM
::
PredictionEarlyStopConfig
());
"none"
,
LightGBM
::
PredictionEarlyStopConfig
());
if
(
early_stop
&&
!
boosting
->
NeedAccuratePrediction
())
{
if
(
early_stop
&&
!
boosting
->
NeedAccuratePrediction
())
{
PredictionEarlyStopConfig
pred_early_stop_config
;
PredictionEarlyStopConfig
pred_early_stop_config
;
CHECK
(
early_stop_freq
>
0
);
CHECK
_GT
(
early_stop_freq
,
0
);
CHECK
(
early_stop_margin
>=
0
);
CHECK
_GE
(
early_stop_margin
,
0
);
pred_early_stop_config
.
margin_threshold
=
early_stop_margin
;
pred_early_stop_config
.
margin_threshold
=
early_stop_margin
;
pred_early_stop_config
.
round_period
=
early_stop_freq
;
pred_early_stop_config
.
round_period
=
early_stop_freq
;
if
(
boosting
->
NumberOfClasses
()
==
1
)
{
if
(
boosting
->
NumberOfClasses
()
==
1
)
{
...
...
src/boosting/gbdt.cpp
View file @
5a80b788
...
@@ -264,9 +264,9 @@ void GBDT::Train(int snapshot_freq, const std::string& model_output_path) {
...
@@ -264,9 +264,9 @@ void GBDT::Train(int snapshot_freq, const std::string& model_output_path) {
}
}
void
GBDT
::
RefitTree
(
const
std
::
vector
<
std
::
vector
<
int
>>&
tree_leaf_prediction
)
{
void
GBDT
::
RefitTree
(
const
std
::
vector
<
std
::
vector
<
int
>>&
tree_leaf_prediction
)
{
CHECK
(
tree_leaf_prediction
.
size
()
>
0
);
CHECK
_GT
(
tree_leaf_prediction
.
size
()
,
0
);
CHECK
(
static_cast
<
size_t
>
(
num_data_
)
==
tree_leaf_prediction
.
size
());
CHECK
_EQ
(
static_cast
<
size_t
>
(
num_data_
)
,
tree_leaf_prediction
.
size
());
CHECK
(
static_cast
<
size_t
>
(
models_
.
size
())
==
tree_leaf_prediction
[
0
].
size
());
CHECK
_EQ
(
static_cast
<
size_t
>
(
models_
.
size
())
,
tree_leaf_prediction
[
0
].
size
());
int
num_iterations
=
static_cast
<
int
>
(
models_
.
size
()
/
num_tree_per_iteration_
);
int
num_iterations
=
static_cast
<
int
>
(
models_
.
size
()
/
num_tree_per_iteration_
);
std
::
vector
<
int
>
leaf_pred
(
num_data_
);
std
::
vector
<
int
>
leaf_pred
(
num_data_
);
for
(
int
iter
=
0
;
iter
<
num_iterations
;
++
iter
)
{
for
(
int
iter
=
0
;
iter
<
num_iterations
;
++
iter
)
{
...
...
src/boosting/gbdt_model_text.cpp
View file @
5a80b788
...
@@ -593,7 +593,7 @@ std::vector<double> GBDT::FeatureImportance(int num_iteration, int importance_ty
...
@@ -593,7 +593,7 @@ std::vector<double> GBDT::FeatureImportance(int num_iteration, int importance_ty
for
(
int
split_idx
=
0
;
split_idx
<
models_
[
iter
]
->
num_leaves
()
-
1
;
++
split_idx
)
{
for
(
int
split_idx
=
0
;
split_idx
<
models_
[
iter
]
->
num_leaves
()
-
1
;
++
split_idx
)
{
if
(
models_
[
iter
]
->
split_gain
(
split_idx
)
>
0
)
{
if
(
models_
[
iter
]
->
split_gain
(
split_idx
)
>
0
)
{
#ifdef DEBUG
#ifdef DEBUG
CHECK
(
models_
[
iter
]
->
split_feature
(
split_idx
)
>=
0
);
CHECK
_GE
(
models_
[
iter
]
->
split_feature
(
split_idx
)
,
0
);
#endif
#endif
feature_importances
[
models_
[
iter
]
->
split_feature
(
split_idx
)]
+=
1.0
;
feature_importances
[
models_
[
iter
]
->
split_feature
(
split_idx
)]
+=
1.0
;
}
}
...
@@ -604,7 +604,7 @@ std::vector<double> GBDT::FeatureImportance(int num_iteration, int importance_ty
...
@@ -604,7 +604,7 @@ std::vector<double> GBDT::FeatureImportance(int num_iteration, int importance_ty
for
(
int
split_idx
=
0
;
split_idx
<
models_
[
iter
]
->
num_leaves
()
-
1
;
++
split_idx
)
{
for
(
int
split_idx
=
0
;
split_idx
<
models_
[
iter
]
->
num_leaves
()
-
1
;
++
split_idx
)
{
if
(
models_
[
iter
]
->
split_gain
(
split_idx
)
>
0
)
{
if
(
models_
[
iter
]
->
split_gain
(
split_idx
)
>
0
)
{
#ifdef DEBUG
#ifdef DEBUG
CHECK
(
models_
[
iter
]
->
split_feature
(
split_idx
)
>=
0
);
CHECK
_GE
(
models_
[
iter
]
->
split_feature
(
split_idx
)
,
0
);
#endif
#endif
feature_importances
[
models_
[
iter
]
->
split_feature
(
split_idx
)]
+=
models_
[
iter
]
->
split_gain
(
split_idx
);
feature_importances
[
models_
[
iter
]
->
split_feature
(
split_idx
)]
+=
models_
[
iter
]
->
split_gain
(
split_idx
);
}
}
...
...
src/c_api.cpp
View file @
5a80b788
...
@@ -1050,7 +1050,7 @@ int LGBM_DatasetGetSubset(
...
@@ -1050,7 +1050,7 @@ int LGBM_DatasetGetSubset(
omp_set_num_threads
(
config
.
num_threads
);
omp_set_num_threads
(
config
.
num_threads
);
}
}
auto
full_dataset
=
reinterpret_cast
<
const
Dataset
*>
(
handle
);
auto
full_dataset
=
reinterpret_cast
<
const
Dataset
*>
(
handle
);
CHECK
(
num_used_row_indices
>
0
);
CHECK
_GT
(
num_used_row_indices
,
0
);
const
int32_t
lower
=
0
;
const
int32_t
lower
=
0
;
const
int32_t
upper
=
full_dataset
->
num_data
()
-
1
;
const
int32_t
upper
=
full_dataset
->
num_data
()
-
1
;
Common
::
CheckElementsIntervalClosed
(
used_row_indices
,
lower
,
upper
,
num_used_row_indices
,
"Used indices of subset"
);
Common
::
CheckElementsIntervalClosed
(
used_row_indices
,
lower
,
upper
,
num_used_row_indices
,
"Used indices of subset"
);
...
...
src/io/bin.cpp
View file @
5a80b788
...
@@ -80,7 +80,7 @@ namespace LightGBM {
...
@@ -80,7 +80,7 @@ namespace LightGBM {
int
num_distinct_values
,
int
max_bin
,
int
num_distinct_values
,
int
max_bin
,
size_t
total_cnt
,
int
min_data_in_bin
)
{
size_t
total_cnt
,
int
min_data_in_bin
)
{
std
::
vector
<
double
>
bin_upper_bound
;
std
::
vector
<
double
>
bin_upper_bound
;
CHECK
(
max_bin
>
0
);
CHECK
_GT
(
max_bin
,
0
);
if
(
num_distinct_values
<=
max_bin
)
{
if
(
num_distinct_values
<=
max_bin
)
{
bin_upper_bound
.
clear
();
bin_upper_bound
.
clear
();
int
cur_cnt_inbin
=
0
;
int
cur_cnt_inbin
=
0
;
...
@@ -514,7 +514,7 @@ namespace LightGBM {
...
@@ -514,7 +514,7 @@ namespace LightGBM {
static_cast
<
uint32_t
>
(
ArrayArgs
<
int
>::
ArgMax
(
cnt_in_bin
));
static_cast
<
uint32_t
>
(
ArrayArgs
<
int
>::
ArgMax
(
cnt_in_bin
));
if
(
bin_type_
==
BinType
::
CategoricalBin
)
{
if
(
bin_type_
==
BinType
::
CategoricalBin
)
{
if
(
most_freq_bin_
==
0
)
{
if
(
most_freq_bin_
==
0
)
{
CHECK
(
num_bin_
>
1
);
CHECK
_GT
(
num_bin_
,
1
);
// FIXME: how to enable `most_freq_bin_ = 0` for categorical features
// FIXME: how to enable `most_freq_bin_ = 0` for categorical features
most_freq_bin_
=
1
;
most_freq_bin_
=
1
;
}
}
...
...
src/io/dataset.cpp
View file @
5a80b788
...
@@ -28,7 +28,7 @@ Dataset::Dataset() {
...
@@ -28,7 +28,7 @@ Dataset::Dataset() {
}
}
Dataset
::
Dataset
(
data_size_t
num_data
)
{
Dataset
::
Dataset
(
data_size_t
num_data
)
{
CHECK
(
num_data
>
0
);
CHECK
_GT
(
num_data
,
0
);
data_filename_
=
"noname"
;
data_filename_
=
"noname"
;
num_data_
=
num_data
;
num_data_
=
num_data
;
metadata_
.
Init
(
num_data_
,
NO_SPECIFIC
,
NO_SPECIFIC
);
metadata_
.
Init
(
num_data_
,
NO_SPECIFIC
,
NO_SPECIFIC
);
...
@@ -403,10 +403,10 @@ void Dataset::Construct(std::vector<std::unique_ptr<BinMapper>>* bin_mappers,
...
@@ -403,10 +403,10 @@ void Dataset::Construct(std::vector<std::unique_ptr<BinMapper>>* bin_mappers,
}
}
}
}
if
(
!
io_config
.
max_bin_by_feature
.
empty
())
{
if
(
!
io_config
.
max_bin_by_feature
.
empty
())
{
CHECK
(
static_cast
<
size_t
>
(
num_total_features_
)
==
CHECK
_EQ
(
static_cast
<
size_t
>
(
num_total_features_
)
,
io_config
.
max_bin_by_feature
.
size
());
io_config
.
max_bin_by_feature
.
size
());
CHECK
(
*
(
std
::
min_element
(
io_config
.
max_bin_by_feature
.
begin
(),
CHECK
_GT
(
*
(
std
::
min_element
(
io_config
.
max_bin_by_feature
.
begin
(),
io_config
.
max_bin_by_feature
.
end
()))
>
1
);
io_config
.
max_bin_by_feature
.
end
()))
,
1
);
max_bin_by_feature_
.
resize
(
num_total_features_
);
max_bin_by_feature_
.
resize
(
num_total_features_
);
max_bin_by_feature_
.
assign
(
io_config
.
max_bin_by_feature
.
begin
(),
max_bin_by_feature_
.
assign
(
io_config
.
max_bin_by_feature
.
begin
(),
io_config
.
max_bin_by_feature
.
end
());
io_config
.
max_bin_by_feature
.
end
());
...
...
src/io/dataset_loader.cpp
View file @
5a80b788
...
@@ -390,8 +390,8 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b
...
@@ -390,8 +390,8 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b
mem_ptr
+=
sizeof
(
int
)
*
(
dataset
->
num_groups_
);
mem_ptr
+=
sizeof
(
int
)
*
(
dataset
->
num_groups_
);
if
(
!
config_
.
max_bin_by_feature
.
empty
())
{
if
(
!
config_
.
max_bin_by_feature
.
empty
())
{
CHECK
(
static_cast
<
size_t
>
(
dataset
->
num_total_features_
)
==
config_
.
max_bin_by_feature
.
size
());
CHECK
_EQ
(
static_cast
<
size_t
>
(
dataset
->
num_total_features_
)
,
config_
.
max_bin_by_feature
.
size
());
CHECK
(
*
(
std
::
min_element
(
config_
.
max_bin_by_feature
.
begin
(),
config_
.
max_bin_by_feature
.
end
()))
>
1
);
CHECK
_GT
(
*
(
std
::
min_element
(
config_
.
max_bin_by_feature
.
begin
(),
config_
.
max_bin_by_feature
.
end
()))
,
1
);
dataset
->
max_bin_by_feature_
.
resize
(
dataset
->
num_total_features_
);
dataset
->
max_bin_by_feature_
.
resize
(
dataset
->
num_total_features_
);
dataset
->
max_bin_by_feature_
.
assign
(
config_
.
max_bin_by_feature
.
begin
(),
config_
.
max_bin_by_feature
.
end
());
dataset
->
max_bin_by_feature_
.
assign
(
config_
.
max_bin_by_feature
.
begin
(),
config_
.
max_bin_by_feature
.
end
());
}
else
{
}
else
{
...
@@ -542,8 +542,8 @@ Dataset* DatasetLoader::CostructFromSampleData(double** sample_values,
...
@@ -542,8 +542,8 @@ Dataset* DatasetLoader::CostructFromSampleData(double** sample_values,
}
}
}
}
if
(
!
config_
.
max_bin_by_feature
.
empty
())
{
if
(
!
config_
.
max_bin_by_feature
.
empty
())
{
CHECK
(
static_cast
<
size_t
>
(
num_col
)
==
config_
.
max_bin_by_feature
.
size
());
CHECK
_EQ
(
static_cast
<
size_t
>
(
num_col
)
,
config_
.
max_bin_by_feature
.
size
());
CHECK
(
*
(
std
::
min_element
(
config_
.
max_bin_by_feature
.
begin
(),
config_
.
max_bin_by_feature
.
end
()))
>
1
);
CHECK
_GT
(
*
(
std
::
min_element
(
config_
.
max_bin_by_feature
.
begin
(),
config_
.
max_bin_by_feature
.
end
()))
,
1
);
}
}
// get forced split
// get forced split
...
@@ -850,12 +850,12 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
...
@@ -850,12 +850,12 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
dataset
->
num_total_features_
=
Network
::
GlobalSyncUpByMax
(
dataset
->
num_total_features_
);
dataset
->
num_total_features_
=
Network
::
GlobalSyncUpByMax
(
dataset
->
num_total_features_
);
}
}
if
(
!
feature_names_
.
empty
())
{
if
(
!
feature_names_
.
empty
())
{
CHECK
(
dataset
->
num_total_features_
==
static_cast
<
int
>
(
feature_names_
.
size
()));
CHECK
_EQ
(
dataset
->
num_total_features_
,
static_cast
<
int
>
(
feature_names_
.
size
()));
}
}
if
(
!
config_
.
max_bin_by_feature
.
empty
())
{
if
(
!
config_
.
max_bin_by_feature
.
empty
())
{
CHECK
(
static_cast
<
size_t
>
(
dataset
->
num_total_features_
)
==
config_
.
max_bin_by_feature
.
size
());
CHECK
_EQ
(
static_cast
<
size_t
>
(
dataset
->
num_total_features_
)
,
config_
.
max_bin_by_feature
.
size
());
CHECK
(
*
(
std
::
min_element
(
config_
.
max_bin_by_feature
.
begin
(),
config_
.
max_bin_by_feature
.
end
()))
>
1
);
CHECK
_GT
(
*
(
std
::
min_element
(
config_
.
max_bin_by_feature
.
begin
(),
config_
.
max_bin_by_feature
.
end
()))
,
1
);
}
}
// get forced split
// get forced split
...
...
src/metric/dcg_calculator.cpp
View file @
5a80b788
...
@@ -25,7 +25,7 @@ void DCGCalculator::DefaultEvalAt(std::vector<int>* eval_at) {
...
@@ -25,7 +25,7 @@ void DCGCalculator::DefaultEvalAt(std::vector<int>* eval_at) {
}
}
}
else
{
}
else
{
for
(
size_t
i
=
0
;
i
<
eval_at
->
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
eval_at
->
size
();
++
i
)
{
CHECK
(
ref_eval_at
[
i
]
>
0
);
CHECK
_GT
(
ref_eval_at
[
i
]
,
0
);
}
}
}
}
}
}
...
...
src/metric/regression_metric.hpp
View file @
5a80b788
...
@@ -271,7 +271,7 @@ class GammaMetric : public RegressionMetric<GammaMetric> {
...
@@ -271,7 +271,7 @@ class GammaMetric : public RegressionMetric<GammaMetric> {
}
}
inline
static
void
CheckLabel
(
label_t
label
)
{
inline
static
void
CheckLabel
(
label_t
label
)
{
CHECK
(
label
>
0
);
CHECK
_GT
(
label
,
0
);
}
}
};
};
...
@@ -293,7 +293,7 @@ class GammaDevianceMetric : public RegressionMetric<GammaDevianceMetric> {
...
@@ -293,7 +293,7 @@ class GammaDevianceMetric : public RegressionMetric<GammaDevianceMetric> {
return
sum_loss
*
2
;
return
sum_loss
*
2
;
}
}
inline
static
void
CheckLabel
(
label_t
label
)
{
inline
static
void
CheckLabel
(
label_t
label
)
{
CHECK
(
label
>
0
);
CHECK
_GT
(
label
,
0
);
}
}
};
};
...
...
src/treelearner/feature_histogram.hpp
View file @
5a80b788
...
@@ -743,7 +743,7 @@ class HistogramPool {
...
@@ -743,7 +743,7 @@ class HistogramPool {
void
Reset
(
int
cache_size
,
int
total_size
)
{
void
Reset
(
int
cache_size
,
int
total_size
)
{
cache_size_
=
cache_size
;
cache_size_
=
cache_size
;
// at least need 2 bucket to store smaller leaf and larger leaf
// at least need 2 bucket to store smaller leaf and larger leaf
CHECK
(
cache_size_
>=
2
);
CHECK
_GE
(
cache_size_
,
2
);
total_size_
=
total_size
;
total_size_
=
total_size
;
if
(
cache_size_
>
total_size_
)
{
if
(
cache_size_
>
total_size_
)
{
cache_size_
=
total_size_
;
cache_size_
=
total_size_
;
...
...
src/treelearner/serial_tree_learner.cpp
View file @
5a80b788
...
@@ -95,7 +95,7 @@ void SerialTreeLearner::GetMultiValBin(const Dataset* dataset, bool is_first_tim
...
@@ -95,7 +95,7 @@ void SerialTreeLearner::GetMultiValBin(const Dataset* dataset, bool is_first_tim
void
SerialTreeLearner
::
ResetTrainingData
(
const
Dataset
*
train_data
)
{
void
SerialTreeLearner
::
ResetTrainingData
(
const
Dataset
*
train_data
)
{
train_data_
=
train_data
;
train_data_
=
train_data
;
num_data_
=
train_data_
->
num_data
();
num_data_
=
train_data_
->
num_data
();
CHECK
(
num_features_
==
train_data_
->
num_features
());
CHECK
_EQ
(
num_features_
,
train_data_
->
num_features
());
// initialize splits for leaf
// initialize splits for leaf
smaller_leaf_splits_
->
ResetNumData
(
num_data_
);
smaller_leaf_splits_
->
ResetNumData
(
num_data_
);
...
@@ -247,7 +247,7 @@ std::vector<int8_t> SerialTreeLearner::GetUsedFeatures(bool is_tree_level) {
...
@@ -247,7 +247,7 @@ std::vector<int8_t> SerialTreeLearner::GetUsedFeatures(bool is_tree_level) {
for
(
int
i
=
0
;
i
<
omp_loop_size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
omp_loop_size
;
++
i
)
{
int
used_feature
=
valid_feature_indices_
[
used_feature_indices_
[
i
]];
int
used_feature
=
valid_feature_indices_
[
used_feature_indices_
[
i
]];
int
inner_feature_index
=
train_data_
->
InnerFeatureIndex
(
used_feature
);
int
inner_feature_index
=
train_data_
->
InnerFeatureIndex
(
used_feature
);
CHECK
(
inner_feature_index
>=
0
);
CHECK
_GE
(
inner_feature_index
,
0
);
ret
[
inner_feature_index
]
=
1
;
ret
[
inner_feature_index
]
=
1
;
}
}
}
else
if
(
used_feature_indices_
.
size
()
<=
0
)
{
}
else
if
(
used_feature_indices_
.
size
()
<=
0
)
{
...
@@ -259,7 +259,7 @@ std::vector<int8_t> SerialTreeLearner::GetUsedFeatures(bool is_tree_level) {
...
@@ -259,7 +259,7 @@ std::vector<int8_t> SerialTreeLearner::GetUsedFeatures(bool is_tree_level) {
for
(
int
i
=
0
;
i
<
omp_loop_size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
omp_loop_size
;
++
i
)
{
int
used_feature
=
valid_feature_indices_
[
sampled_indices
[
i
]];
int
used_feature
=
valid_feature_indices_
[
sampled_indices
[
i
]];
int
inner_feature_index
=
train_data_
->
InnerFeatureIndex
(
used_feature
);
int
inner_feature_index
=
train_data_
->
InnerFeatureIndex
(
used_feature
);
CHECK
(
inner_feature_index
>=
0
);
CHECK
_GE
(
inner_feature_index
,
0
);
ret
[
inner_feature_index
]
=
1
;
ret
[
inner_feature_index
]
=
1
;
}
}
}
else
{
}
else
{
...
@@ -271,7 +271,7 @@ std::vector<int8_t> SerialTreeLearner::GetUsedFeatures(bool is_tree_level) {
...
@@ -271,7 +271,7 @@ std::vector<int8_t> SerialTreeLearner::GetUsedFeatures(bool is_tree_level) {
for
(
int
i
=
0
;
i
<
omp_loop_size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
omp_loop_size
;
++
i
)
{
int
used_feature
=
valid_feature_indices_
[
used_feature_indices_
[
sampled_indices
[
i
]]];
int
used_feature
=
valid_feature_indices_
[
used_feature_indices_
[
sampled_indices
[
i
]]];
int
inner_feature_index
=
train_data_
->
InnerFeatureIndex
(
used_feature
);
int
inner_feature_index
=
train_data_
->
InnerFeatureIndex
(
used_feature
);
CHECK
(
inner_feature_index
>=
0
);
CHECK
_GE
(
inner_feature_index
,
0
);
ret
[
inner_feature_index
]
=
1
;
ret
[
inner_feature_index
]
=
1
;
}
}
}
}
...
@@ -706,11 +706,11 @@ void SerialTreeLearner::Split(Tree* tree, int best_leaf, int* left_leaf, int* ri
...
@@ -706,11 +706,11 @@ void SerialTreeLearner::Split(Tree* tree, int best_leaf, int* left_leaf, int* ri
// init the leaves that used on next iteration
// init the leaves that used on next iteration
if
(
best_split_info
.
left_count
<
best_split_info
.
right_count
)
{
if
(
best_split_info
.
left_count
<
best_split_info
.
right_count
)
{
CHECK
(
best_split_info
.
left_count
>
0
);
CHECK
_GT
(
best_split_info
.
left_count
,
0
);
smaller_leaf_splits_
->
Init
(
*
left_leaf
,
data_partition_
.
get
(),
best_split_info
.
left_sum_gradient
,
best_split_info
.
left_sum_hessian
);
smaller_leaf_splits_
->
Init
(
*
left_leaf
,
data_partition_
.
get
(),
best_split_info
.
left_sum_gradient
,
best_split_info
.
left_sum_hessian
);
larger_leaf_splits_
->
Init
(
*
right_leaf
,
data_partition_
.
get
(),
best_split_info
.
right_sum_gradient
,
best_split_info
.
right_sum_hessian
);
larger_leaf_splits_
->
Init
(
*
right_leaf
,
data_partition_
.
get
(),
best_split_info
.
right_sum_gradient
,
best_split_info
.
right_sum_hessian
);
}
else
{
}
else
{
CHECK
(
best_split_info
.
right_count
>
0
);
CHECK
_GT
(
best_split_info
.
right_count
,
0
);
smaller_leaf_splits_
->
Init
(
*
right_leaf
,
data_partition_
.
get
(),
best_split_info
.
right_sum_gradient
,
best_split_info
.
right_sum_hessian
);
smaller_leaf_splits_
->
Init
(
*
right_leaf
,
data_partition_
.
get
(),
best_split_info
.
right_sum_gradient
,
best_split_info
.
right_sum_hessian
);
larger_leaf_splits_
->
Init
(
*
left_leaf
,
data_partition_
.
get
(),
best_split_info
.
left_sum_gradient
,
best_split_info
.
left_sum_hessian
);
larger_leaf_splits_
->
Init
(
*
left_leaf
,
data_partition_
.
get
(),
best_split_info
.
left_sum_gradient
,
best_split_info
.
left_sum_hessian
);
}
}
...
@@ -727,7 +727,7 @@ void SerialTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj
...
@@ -727,7 +727,7 @@ void SerialTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj
CHECK
(
tree
->
num_leaves
()
<=
data_partition_
->
num_leaves
());
CHECK
(
tree
->
num_leaves
()
<=
data_partition_
->
num_leaves
());
const
data_size_t
*
bag_mapper
=
nullptr
;
const
data_size_t
*
bag_mapper
=
nullptr
;
if
(
total_num_data
!=
num_data_
)
{
if
(
total_num_data
!=
num_data_
)
{
CHECK
(
bag_cnt
==
num_data_
);
CHECK
_EQ
(
bag_cnt
,
num_data_
);
bag_mapper
=
bag_indices
;
bag_mapper
=
bag_indices
;
}
}
std
::
vector
<
int
>
n_nozeroworker_perleaf
(
tree
->
num_leaves
(),
1
);
std
::
vector
<
int
>
n_nozeroworker_perleaf
(
tree
->
num_leaves
(),
1
);
...
@@ -742,7 +742,7 @@ void SerialTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj
...
@@ -742,7 +742,7 @@ void SerialTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj
const
double
new_output
=
obj
->
RenewTreeOutput
(
output
,
residual_getter
,
index_mapper
,
bag_mapper
,
cnt_leaf_data
);
const
double
new_output
=
obj
->
RenewTreeOutput
(
output
,
residual_getter
,
index_mapper
,
bag_mapper
,
cnt_leaf_data
);
tree
->
SetLeafOutput
(
i
,
new_output
);
tree
->
SetLeafOutput
(
i
,
new_output
);
}
else
{
}
else
{
CHECK
(
num_machines
>
1
);
CHECK
_GT
(
num_machines
,
1
);
tree
->
SetLeafOutput
(
i
,
0.0
);
tree
->
SetLeafOutput
(
i
,
0.0
);
n_nozeroworker_perleaf
[
i
]
=
0
;
n_nozeroworker_perleaf
[
i
]
=
0
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment