Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
01e10529
Commit
01e10529
authored
Nov 07, 2016
by
Guolin Ke
Browse files
clean code
parent
e161a746
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
72 additions
and
44 deletions
+72
-44
include/LightGBM/boosting.h
include/LightGBM/boosting.h
+26
-4
include/LightGBM/c_api.h
include/LightGBM/c_api.h
+4
-4
src/application/predictor.hpp
src/application/predictor.hpp
+3
-8
src/boosting/boosting.cpp
src/boosting/boosting.cpp
+1
-1
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+5
-5
src/boosting/gbdt.h
src/boosting/gbdt.h
+24
-6
src/c_api.cpp
src/c_api.cpp
+8
-15
src/network/linker_topo.cpp
src/network/linker_topo.cpp
+1
-1
No files found.
include/LightGBM/boosting.h
View file @
01e10529
...
...
@@ -43,14 +43,36 @@ public:
virtual
void
AddDataset
(
const
Dataset
*
valid_data
,
const
std
::
vector
<
const
Metric
*>&
valid_metrics
)
=
0
;
/*! \brief Training logic */
/*!
* \brief Training logic
* \param gradient nullptr for using default objective, otherwise use self-defined boosting
* \param hessian nullptr for using default objective, otherwise use self-defined boosting
* \param is_eval true if need evalulation or early stop
* \return True if meet early stopping or cannot boosting
*/
virtual
bool
TrainOneIter
(
const
score_t
*
gradient
,
const
score_t
*
hessian
,
bool
is_eval
)
=
0
;
/*!
* \brief Get evaluation result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \return evaluation result
*/
virtual
std
::
vector
<
double
>
GetEvalAt
(
int
data_idx
)
const
=
0
;
/*!
* \brief Get current training score
* \param out_len lenght of returned score
* \return training score
*/
virtual
const
score_t
*
GetTrainingScore
(
data_size_t
*
out_len
)
const
=
0
;
virtual
void
GetPredict
(
int
data_idx
,
score_t
*
out_result
,
data_size_t
*
out_len
)
const
=
0
;
/*!
* \brief Get prediction result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \param result used to store prediction result, should allocate memory before call this function
* \param out_len lenght of returned score
*/
virtual
void
GetPredictAt
(
int
data_idx
,
score_t
*
result
,
data_size_t
*
out_len
)
const
=
0
;
/*!
* \brief Prediction for one record, not sigmoid transform
...
...
@@ -83,7 +105,7 @@ public:
* \brief Restore from a serialized string
* \param model_str The string of model
*/
virtual
void
Model
s
FromString
(
const
std
::
string
&
model_str
)
=
0
;
virtual
void
Load
ModelFromString
(
const
std
::
string
&
model_str
)
=
0
;
/*!
* \brief Get max feature index of this model
...
...
@@ -107,7 +129,7 @@ public:
* \brief Get number of classes
* \return Number of classes
*/
virtual
int
NumberOfClass
()
const
=
0
;
virtual
int
NumberOfClass
es
()
const
=
0
;
/*!
* \brief Set number of used model for prediction
...
...
include/LightGBM/c_api.h
View file @
01e10529
...
...
@@ -256,7 +256,7 @@ DllExport int LGBM_BoosterUpdateOneIterCustom(BoosterHandle handle,
* \param handle handle
* \param data 0:training data, 1: 1st valid data, 2:2nd valid data ...
* \param out_len len of output result
* \param out_result the string containing evaluation statistics
* \param out_result the string containing evaluation statistics
, should allocate memory before call this function
* \return 0 when success, -1 when failure happens
*/
DllExport
int
LGBM_BoosterEval
(
BoosterHandle
handle
,
...
...
@@ -281,7 +281,7 @@ this can be used to support customized eval function
* \param handle handle
* \param data 0:training data, 1: 1st valid data, 2:2nd valid data ...
* \param out_len len of output result
* \param out_result used to set a pointer to array
* \param out_result used to set a pointer to array
, should allocate memory before call this function
* \return 0 when success, -1 when failure happens
*/
DllExport
int
LGBM_BoosterGetPredict
(
BoosterHandle
handle
,
...
...
@@ -304,7 +304,7 @@ DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
* 1:with transform(if needed)
* 2:leaf index
* \param n_used_trees number of used tree
* \param out_result used to set a pointer to array
* \param out_result used to set a pointer to array
, should allocate memory before call this function
* \return 0 when success, -1 when failure happens
*/
DllExport
int
LGBM_BoosterPredictForCSR
(
BoosterHandle
handle
,
...
...
@@ -332,7 +332,7 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
* 1:with transform(if needed)
* 2:leaf index
* \param n_used_trees number of used tree
* \param out_result used to set a pointer to array
* \param out_result used to set a pointer to array
, should allocate memory before call this function
* \return 0 when success, -1 when failure happens
*/
DllExport
int
LGBM_BoosterPredictForMat
(
BoosterHandle
handle
,
...
...
src/application/predictor.hpp
View file @
01e10529
...
...
@@ -28,8 +28,7 @@ public:
* \param is_raw_score True if need to predict result with raw score
* \param predict_leaf_index True if output leaf index instead of prediction score
*/
Predictor
(
const
Boosting
*
boosting
,
bool
is_raw_score
,
bool
is_predict_leaf_index
)
:
is_raw_score_
(
is_raw_score
),
is_predict_leaf_index_
(
is_predict_leaf_index
)
{
Predictor
(
const
Boosting
*
boosting
,
bool
is_raw_score
,
bool
is_predict_leaf_index
)
{
boosting_
=
boosting
;
num_features_
=
boosting_
->
MaxFeatureIdx
()
+
1
;
#pragma omp parallel
...
...
@@ -42,7 +41,7 @@ public:
features_
[
i
]
=
new
double
[
num_features_
];
}
if
(
is_predict_leaf_index
_
)
{
if
(
is_predict_leaf_index
)
{
predict_fun_
=
[
this
](
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
)
{
const
int
tid
=
PutFeatureValuesToBuffer
(
features
);
// get result for leaf index
...
...
@@ -50,7 +49,7 @@ public:
return
std
::
vector
<
double
>
(
result
.
begin
(),
result
.
end
());
};
}
else
{
if
(
is_raw_score
_
)
{
if
(
is_raw_score
)
{
predict_fun_
=
[
this
](
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
)
{
const
int
tid
=
PutFeatureValuesToBuffer
(
features
);
// get result without sigmoid transformation
...
...
@@ -156,12 +155,8 @@ private:
double
**
features_
;
/*! \brief Number of features */
int
num_features_
;
/*! \brief True if need to predict result with sigmoid transform */
bool
is_raw_score_
;
/*! \brief Number of threads */
int
num_threads_
;
/*! \brief True if output leaf index instead of prediction score */
bool
is_predict_leaf_index_
;
/*! \brief function for prediction */
PredictFunction
predict_fun_
;
};
...
...
src/boosting/boosting.cpp
View file @
01e10529
...
...
@@ -20,7 +20,7 @@ void LoadFileToBoosting(Boosting* boosting, const char* filename) {
for
(
auto
&
line
:
model_reader
.
Lines
())
{
str_buf
<<
line
<<
'\n'
;
}
boosting
->
Model
s
FromString
(
str_buf
.
str
());
boosting
->
Load
ModelFromString
(
str_buf
.
str
());
}
}
...
...
src/boosting/gbdt.cpp
View file @
01e10529
...
...
@@ -238,7 +238,7 @@ bool GBDT::OutputMetric(int iter) {
for
(
auto
&
sub_metric
:
training_metrics_
)
{
auto
name
=
sub_metric
->
GetName
();
auto
scores
=
sub_metric
->
Eval
(
train_score_updater_
->
score
());
for
(
size_t
k
=
0
;
k
<
name
.
size
();
k
++
)
{
for
(
size_t
k
=
0
;
k
<
name
.
size
();
++
k
)
{
Log
::
Info
(
"Iteration: %d, %s : %f"
,
iter
,
name
[
k
].
c_str
(),
scores
[
k
]);
}
}
...
...
@@ -250,7 +250,7 @@ bool GBDT::OutputMetric(int iter) {
auto
test_scores
=
valid_metrics_
[
i
][
j
]
->
Eval
(
valid_score_updater_
[
i
]
->
score
());
if
((
iter
%
gbdt_config_
->
output_freq
)
==
0
)
{
auto
name
=
valid_metrics_
[
i
][
j
]
->
GetName
();
for
(
size_t
k
=
0
;
k
<
name
.
size
();
k
++
)
{
for
(
size_t
k
=
0
;
k
<
name
.
size
();
++
k
)
{
Log
::
Info
(
"Iteration: %d, %s : %f"
,
iter
,
name
[
k
].
c_str
(),
test_scores
[
k
]);
}
}
...
...
@@ -299,7 +299,7 @@ const score_t* GBDT::GetTrainingScore(data_size_t* out_len) const {
return
train_score_updater_
->
score
();
}
void
GBDT
::
GetPredict
(
int
data_idx
,
score_t
*
out_result
,
data_size_t
*
out_len
)
const
{
void
GBDT
::
GetPredict
At
(
int
data_idx
,
score_t
*
out_result
,
data_size_t
*
out_len
)
const
{
CHECK
(
data_idx
>=
0
&&
data_idx
<=
static_cast
<
int
>
(
valid_metrics_
.
size
()));
std
::
vector
<
double
>
ret
;
...
...
@@ -373,7 +373,7 @@ void GBDT::SaveModelToFile(int num_used_model, bool is_finish, const char* filen
if
(
!
model_output_file_
.
is_open
())
{
return
;
}
if
(
num_used_model
_
==
NO_LIMIT
)
{
if
(
num_used_model
==
NO_LIMIT
)
{
num_used_model
=
static_cast
<
int
>
(
models_
.
size
());
}
else
{
num_used_model
=
num_used_model
*
num_class_
;
...
...
@@ -399,7 +399,7 @@ void GBDT::SaveModelToFile(int num_used_model, bool is_finish, const char* filen
}
}
void
GBDT
::
Model
s
FromString
(
const
std
::
string
&
model_str
)
{
void
GBDT
::
Load
ModelFromString
(
const
std
::
string
&
model_str
)
{
// use serialized string to restore this object
models_
.
clear
();
std
::
vector
<
std
::
string
>
lines
=
Common
::
Split
(
model_str
.
c_str
(),
'\n'
);
...
...
src/boosting/gbdt.h
View file @
01e10529
...
...
@@ -42,17 +42,35 @@ public:
void
AddDataset
(
const
Dataset
*
valid_data
,
const
std
::
vector
<
const
Metric
*>&
valid_metrics
)
override
;
/*!
* \brief one training iteration
* \brief Training logic
* \param gradient nullptr for using default objective, otherwise use self-defined boosting
* \param hessian nullptr for using default objective, otherwise use self-defined boosting
* \param is_eval true if need evalulation or early stop
* \return True if meet early stopping or cannot boosting
*/
bool
TrainOneIter
(
const
score_t
*
gradient
,
const
score_t
*
hessian
,
bool
is_eval
)
override
;
/*!
* \brief Get evaluation result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \return evaluation result
*/
std
::
vector
<
double
>
GetEvalAt
(
int
data_idx
)
const
override
;
/*! \brief Get prediction result */
/*!
* \brief Get current training score
* \param out_len lenght of returned score
* \return training score
*/
const
score_t
*
GetTrainingScore
(
data_size_t
*
out_len
)
const
override
;
void
GetPredict
(
int
data_idx
,
score_t
*
out_result
,
data_size_t
*
out_len
)
const
override
;
/*!
* \brief Get prediction result at data_idx data
* \param data_idx 0: training data, 1: 1st validation data
* \param result used to store prediction result, should allocate memory before call this function
* \param out_len lenght of returned score
*/
void
GetPredictAt
(
int
data_idx
,
score_t
*
out_result
,
data_size_t
*
out_len
)
const
override
;
/*!
* \brief Predtion for one record without sigmoid transformation
...
...
@@ -83,7 +101,7 @@ public:
/*!
* \brief Restore from a serialized string
*/
void
Model
s
FromString
(
const
std
::
string
&
model_str
)
override
;
void
Load
ModelFromString
(
const
std
::
string
&
model_str
)
override
;
/*!
* \brief Get max feature index of this model
* \return Max feature index of this model
...
...
@@ -106,7 +124,7 @@ public:
* \brief Get number of classes
* \return Number of classes
*/
inline
int
NumberOfClass
()
const
override
{
return
num_class_
;
}
inline
int
NumberOfClass
es
()
const
override
{
return
num_class_
;
}
/*!
* \brief Set number of used model for prediction
...
...
src/c_api.cpp
View file @
01e10529
...
...
@@ -124,7 +124,7 @@ public:
}
const
Boosting
*
GetBoosting
()
const
{
return
boosting_
;
}
const
inline
int
NumberOfClass
()
const
{
return
boosting_
->
NumberOfClass
();
}
const
inline
int
NumberOfClass
es
()
const
{
return
boosting_
->
NumberOfClass
es
();
}
private:
...
...
@@ -203,10 +203,10 @@ DllExport int LGBM_CreateDatasetFromMat(const void* data,
const
size_t
sample_cnt
=
static_cast
<
size_t
>
(
nrow
<
config
.
io_config
.
bin_construct_sample_cnt
?
nrow
:
config
.
io_config
.
bin_construct_sample_cnt
);
auto
sample_indices
=
rand
.
Sample
(
nrow
,
sample_cnt
);
std
::
vector
<
std
::
vector
<
double
>>
sample_values
(
ncol
);
for
(
size_t
i
=
0
;
i
<
sample_indices
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
sample_indices
.
size
();
++
i
)
{
auto
idx
=
sample_indices
[
i
];
auto
row
=
get_row_fun
(
static_cast
<
int
>
(
idx
));
for
(
size_t
j
=
0
;
j
<
row
.
size
();
j
++
)
{
for
(
size_t
j
=
0
;
j
<
row
.
size
();
++
j
)
{
sample_values
[
j
].
push_back
(
row
[
j
]);
}
}
...
...
@@ -477,7 +477,7 @@ DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
auto
boosting
=
ref_booster
->
GetBoosting
();
int
len
=
0
;
boosting
->
GetPredict
(
data
,
out_result
,
&
len
);
boosting
->
GetPredict
At
(
data
,
out_result
,
&
len
);
*
out_len
=
static_cast
<
uint64_t
>
(
len
);
return
0
;
}
...
...
@@ -498,13 +498,13 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
ref_booster
->
PrepareForPrediction
(
static_cast
<
int
>
(
n_used_trees
),
predict_type
);
auto
get_row_fun
=
Common
::
RowFunctionFromCSR
(
indptr
,
indices
,
data
,
float_type
,
nindptr
,
nelem
);
int
num_class
=
ref_booster
->
NumberOfClass
();
int
num_class
=
ref_booster
->
NumberOfClass
es
();
int
nrow
=
static_cast
<
int
>
(
nindptr
-
1
);
#pragma omp parallel for schedule(guided)
for
(
int
i
=
0
;
i
<
nrow
;
++
i
)
{
auto
one_row
=
get_row_fun
(
i
);
auto
predicton_result
=
ref_booster
->
Predict
(
one_row
);
for
(
int
j
=
0
;
j
<
num_class
;
j
++
)
{
for
(
int
j
=
0
;
j
<
num_class
;
++
j
)
{
out_result
[
i
*
num_class
+
j
]
=
predicton_result
[
j
];
}
}
...
...
@@ -525,25 +525,18 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
ref_booster
->
PrepareForPrediction
(
static_cast
<
int
>
(
n_used_trees
),
predict_type
);
auto
get_row_fun
=
Common
::
RowPairFunctionFromDenseMatric
(
data
,
nrow
,
ncol
,
float_type
,
is_row_major
);
int
num_class
=
ref_booster
->
NumberOfClass
();
int
num_class
=
ref_booster
->
NumberOfClass
es
();
#pragma omp parallel for schedule(guided)
for
(
int
i
=
0
;
i
<
nrow
;
++
i
)
{
auto
one_row
=
get_row_fun
(
i
);
auto
predicton_result
=
ref_booster
->
Predict
(
one_row
);
for
(
int
j
=
0
;
j
<
num_class
;
j
++
)
{
for
(
int
j
=
0
;
j
<
num_class
;
++
j
)
{
out_result
[
i
*
num_class
+
j
]
=
predicton_result
[
j
];
}
}
return
0
;
}
/*!
* \brief save model into file
* \param handle handle
* \param num_used_model
* \param filename file name
* \return 0 when success, -1 when failure happens
*/
DllExport
int
LGBM_BoosterSaveModel
(
BoosterHandle
handle
,
int
num_used_model
,
const
char
*
filename
)
{
...
...
src/network/linker_topo.cpp
View file @
01e10529
...
...
@@ -27,7 +27,7 @@ BruckMap BruckMap::Construct(int rank, int num_machines) {
// distance at k-th communication, distance[k] = 2^k
std
::
vector
<
int
>
distance
;
int
k
=
0
;
for
(
k
=
0
;
(
1
<<
k
)
<
num_machines
;
k
++
)
{
for
(
k
=
0
;
(
1
<<
k
)
<
num_machines
;
++
k
)
{
distance
.
push_back
(
1
<<
k
);
}
BruckMap
bruckMap
(
k
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment