Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
0b9fe27a
Commit
0b9fe27a
authored
Oct 25, 2016
by
Hui Xue
Browse files
t push origin masterMerge branch 'xuehui1991-update_for_dcg'
merge to master.
parents
0dcd422a
bb05a06f
Changes
22
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
43 additions
and
44 deletions
+43
-44
include/LightGBM/bin.h
include/LightGBM/bin.h
+3
-3
include/LightGBM/boosting.h
include/LightGBM/boosting.h
+2
-2
include/LightGBM/config.h
include/LightGBM/config.h
+5
-5
include/LightGBM/dataset.h
include/LightGBM/dataset.h
+7
-7
include/LightGBM/network.h
include/LightGBM/network.h
+1
-1
src/application/application.cpp
src/application/application.cpp
+1
-1
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+0
-1
src/boosting/gbdt.h
src/boosting/gbdt.h
+3
-3
src/boosting/score_updater.hpp
src/boosting/score_updater.hpp
+2
-2
src/io/dataset.cpp
src/io/dataset.cpp
+4
-4
src/io/dense_bin.hpp
src/io/dense_bin.hpp
+1
-1
src/io/ordered_sparse_bin.hpp
src/io/ordered_sparse_bin.hpp
+1
-1
src/metric/binary_metric.hpp
src/metric/binary_metric.hpp
+1
-1
src/metric/dcg_calculator.cpp
src/metric/dcg_calculator.cpp
+1
-1
src/metric/regression_metric.hpp
src/metric/regression_metric.hpp
+1
-1
src/network/network.cpp
src/network/network.cpp
+2
-2
src/objective/rank_objective.hpp
src/objective/rank_objective.hpp
+1
-1
src/treelearner/feature_histogram.hpp
src/treelearner/feature_histogram.hpp
+3
-3
src/treelearner/leaf_splits.hpp
src/treelearner/leaf_splits.hpp
+2
-2
src/treelearner/parallel_tree_learner.h
src/treelearner/parallel_tree_learner.h
+2
-2
No files found.
include/LightGBM/bin.h
View file @
0b9fe27a
...
...
@@ -119,7 +119,7 @@ private:
};
/*!
* \brief Interface for ordered bin data. It very efficient for construct histogram, especially for sparse bin
* \brief Interface for ordered bin data. It
's
very efficient for construct
ing
histogram, especially for sparse bin
* There are 2 advantages for using ordered bin.
* 1. group the data by leaf, improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram consturction for sparse feature.
...
...
@@ -253,7 +253,7 @@ public:
virtual
OrderedBin
*
CreateOrderedBin
()
const
=
0
;
/*!
* \brief After pushed all feature data,
should
call this
to
have better refactor for bin data
* \brief After pushed all feature data, call this
could
have better refactor for bin data
*/
virtual
void
FinishLoad
()
=
0
;
...
...
@@ -261,7 +261,7 @@ public:
* \brief Create object for bin data of one feature, will call CreateDenseBin or CreateSparseBin according to "is_sparse"
* \param num_data Total number of data
* \param num_bin Number of bin
* \param is_sparse True if this feature is s
apre
se
* \param is_sparse True if this feature is s
par
se
* \param sparse_rate Sparse rate of this bins( num_bin0/num_data )
* \param is_enable_sparse True if enable sparse feature
* \param is_sparse Will set to true if this bin is sparse
...
...
include/LightGBM/boosting.h
View file @
0b9fe27a
...
...
@@ -47,14 +47,14 @@ public:
virtual
void
Train
()
=
0
;
/*!
* \brief Predtion for one record, not sigmoid transform
* \brief Pred
ic
tion for one record, not sigmoid transform
* \param feature_values Feature value on this record
* \return Prediction result for this record
*/
virtual
double
PredictRaw
(
const
double
*
feature_values
)
const
=
0
;
/*!
* \brief Predtion for one record, will use sigmoid transform if needed
* \brief Pred
ic
tion for one record, will use sigmoid transform if needed
* \param feature_values Feature value on this record
* \return Prediction result for this record
*/
...
...
include/LightGBM/config.h
View file @
0b9fe27a
...
...
@@ -20,7 +20,7 @@ public:
virtual
~
ConfigBase
()
{}
/*!
* \brief Set
LabelAt
current config object by params
* \brief Set current config object by params
* \param params Store the key and value for params
*/
virtual
void
Set
(
...
...
@@ -30,7 +30,7 @@ public:
* \brief Get string value by specific name of key
* \param params Store the key and value for params
* \param name Name of key
* \param out Value will asign to out if key exists
* \param out Value will as
s
ign to out if key exists
* \return True if key exists
*/
inline
bool
GetString
(
...
...
@@ -41,7 +41,7 @@ public:
* \brief Get int value by specific name of key
* \param params Store the key and value for params
* \param name Name of key
* \param out Value will asign to out if key exists
* \param out Value will as
s
ign to out if key exists
* \return True if key exists
*/
inline
bool
GetInt
(
...
...
@@ -52,7 +52,7 @@ public:
* \brief Get double value by specific name of key
* \param params Store the key and value for params
* \param name Name of key
* \param out Value will asign to out if key exists
* \param out Value will as
s
ign to out if key exists
* \return True if key exists
*/
inline
bool
GetDouble
(
...
...
@@ -63,7 +63,7 @@ public:
* \brief Get bool value by specific name of key
* \param params Store the key and value for params
* \param name Name of key
* \param out Value will asign to out if key exists
* \param out Value will as
s
ign to out if key exists
* \return True if key exists
*/
inline
bool
GetBool
(
...
...
include/LightGBM/dataset.h
View file @
0b9fe27a
...
...
@@ -17,7 +17,7 @@ namespace LightGBM {
class
Feature
;
/*!
* \brief This class is used to store some meta(non-feature) data for tra
n
ining data,
* \brief This class is used to store some meta(non-feature) data for training data,
* e.g. labels, weights, initial scores, qurey level informations.
*
* Some details:
...
...
@@ -110,14 +110,14 @@ public:
}
/*!
* \brief Get weights, if not exists, will return nullp
u
t
* \brief Get weights, if not exists, will return nullpt
r
* \return Pointer of weights
*/
inline
const
float
*
weights
()
const
{
return
weights_
;
}
/*!
* \brief Get data boundaries on queries, if not exists, will return nullp
u
t
* \brief Get data boundaries on queries, if not exists, will return nullpt
r
* we assume data will order by query,
* the interval of [query_boundaris[i], query_boundaris[i+1])
* is the data indices for query i.
...
...
@@ -133,13 +133,13 @@ public:
inline
const
data_size_t
num_queries
()
const
{
return
num_queries_
;
}
/*!
* \brief Get weights for queries, if not exists, will return nullp
u
t
* \brief Get weights for queries, if not exists, will return nullpt
r
* \return Pointer of weights for queries
*/
inline
const
float
*
query_weights
()
const
{
return
query_weights_
;
}
/*!
* \brief Get initial scores, if not exists, will return nullp
u
t
* \brief Get initial scores, if not exists, will return nullpt
r
* \return Pointer of initial scores
*/
inline
const
score_t
*
init_score
()
const
{
return
init_score_
;
}
...
...
@@ -231,7 +231,7 @@ public:
* \param max_bin The maximal number of bin that feature values will bucket in
* \param random_seed The seed for random generator
* \param is_enable_sparse True for sparse feature
* \param predict_fun Used for initial model, will give a prediction score based on this function, then
n
set as initial score
* \param predict_fun Used for initial model, will give a prediction score based on this function, then set as initial score
*/
Dataset
(
const
char
*
data_filename
,
const
char
*
init_score_filename
,
int
max_bin
,
int
random_seed
,
bool
is_enable_sparse
,
const
PredictFunction
&
predict_fun
);
...
...
@@ -243,7 +243,7 @@ public:
* \param max_bin The maximal number of bin that feature values will bucket in
* \param random_seed The seed for random generator
* \param is_enable_sparse True for sparse feature
* \param predict_fun Used for initial model, will give a prediction score based on this function, then
n
set as initial score
* \param predict_fun Used for initial model, will give a prediction score based on this function, then set as initial score
*/
Dataset
(
const
char
*
data_filename
,
int
max_bin
,
int
random_seed
,
bool
is_enable_sparse
,
...
...
include/LightGBM/network.h
View file @
0b9fe27a
...
...
@@ -37,7 +37,7 @@ public:
/*!
* \brief node type on recursive halving algorithm
* When number of machines is not power of 2, need group ma
i
ches into power of 2 group.
* When number of machines is not power of 2, need group mach
in
es into power of 2 group.
* And we can let each group has at most 2 machines.
* if the group only has 1 machine. this machine is the normal node
* if the grou has 2 machines, this group will have two type of nodes, one is the leader.
...
...
src/application/application.cpp
View file @
0b9fe27a
...
...
@@ -80,7 +80,7 @@ void Application::LoadParameters(int argc, char** argv) {
config_reader
.
ReadAllLines
();
if
(
config_reader
.
Lines
().
size
()
>
0
)
{
for
(
auto
&
line
:
config_reader
.
Lines
())
{
// remove str after
#
// remove str after
"#"
if
(
line
.
size
()
>
0
&&
std
::
string
::
npos
!=
line
.
find_first_of
(
"#"
))
{
line
.
erase
(
line
.
find_first_of
(
"#"
));
}
...
...
src/boosting/gbdt.cpp
View file @
0b9fe27a
...
...
@@ -248,7 +248,6 @@ std::string GBDT::ModelsToString() const {
void
GBDT
::
ModelsFromString
(
const
std
::
string
&
model_str
,
int
num_used_model
)
{
// use serialized string to restore this object
// deseialize string to object????
models_
.
clear
();
std
::
vector
<
std
::
string
>
lines
=
Common
::
Split
(
model_str
.
c_str
(),
'\n'
);
size_t
i
=
0
;
...
...
src/boosting/gbdt.h
View file @
0b9fe27a
...
...
@@ -107,7 +107,7 @@ private:
*/
void
UpdateScore
(
const
Tree
*
tree
);
/*!
* \brief Print
M
etric result of current iteration
* \brief Print
m
etric result of current iteration
* \param iter Current interation
*/
void
OutputMetric
(
int
iter
);
...
...
@@ -116,11 +116,11 @@ private:
const
Dataset
*
train_data_
;
/*! \brief Config of gbdt */
const
GBDTConfig
*
gbdt_config_
;
/*! \brief Tree learner, will use t
i
hs class to learn trees */
/*! \brief Tree learner, will use th
i
s class to learn trees */
TreeLearner
*
tree_learner_
;
/*! \brief Objective function */
const
ObjectiveFunction
*
object_function_
;
/*! \brief Store and update traning data's score */
/*! \brief Store and update tra
i
ning data's score */
ScoreUpdater
*
train_score_updater_
;
/*! \brief Metrics for training data */
std
::
vector
<
const
Metric
*>
training_metrics_
;
...
...
src/boosting/score_updater.hpp
View file @
0b9fe27a
...
...
@@ -57,8 +57,8 @@ public:
* \brief Like AddScore(const Tree* tree), but only for part of data
* Used for prediction of training out-of-bad data
* \param tree Trained tree model
* \param data_indices Indices of data that w
ant
proccess
to
* \param data_cnt Number of data that w
ant
proccess
to
* \param data_indices Indices of data that w
ill be
proccess
ed
* \param data_cnt Number of data that w
ill be
proccess
ed
*/
inline
void
AddScore
(
const
Tree
*
tree
,
const
data_size_t
*
data_indices
,
data_size_t
data_cnt
)
{
...
...
src/io/dataset.cpp
View file @
0b9fe27a
...
...
@@ -31,12 +31,12 @@ Dataset::Dataset(const char* data_filename, const char* init_score_filename,
// create text parser
parser_
=
Parser
::
CreateParser
(
data_filename_
,
0
,
nullptr
);
if
(
parser_
==
nullptr
)
{
Log
::
Stderr
(
"cannot recogni
s
e input data format, filename: %s"
,
data_filename_
);
Log
::
Stderr
(
"cannot recogni
z
e input data format, filename: %s"
,
data_filename_
);
}
// create text reader
text_reader_
=
new
TextReader
<
data_size_t
>
(
data_filename
);
}
else
{
// only need to load initilize score, other meta data will load from bin flie
// only need to load initilize score, other meta data will
be
load
ed
from bin flie
metadata_
.
Init
(
init_score_filename
);
Log
::
Stdout
(
"will load data set from binary file"
);
parser_
=
nullptr
;
...
...
@@ -613,7 +613,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
size_t
size_of_metadata
=
*
(
reinterpret_cast
<
size_t
*>
(
buffer
));
// re-alloc
m
ate space if not enough
// re-allocate space if not enough
if
(
size_of_metadata
>
buffer_size
)
{
delete
[]
buffer
;
buffer_size
=
size_of_metadata
;
...
...
@@ -673,7 +673,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
Log
::
Stderr
(
"binary file format error at feature %d's size"
,
i
);
}
size_t
size_of_feature
=
*
(
reinterpret_cast
<
size_t
*>
(
buffer
));
// re-alloc
m
ate space if not enough
// re-allocate space if not enough
if
(
size_of_feature
>
buffer_size
)
{
delete
[]
buffer
;
buffer_size
=
size_of_feature
;
...
...
src/io/dense_bin.hpp
View file @
0b9fe27a
...
...
@@ -10,7 +10,7 @@
namespace
LightGBM
{
/*!
* \brief Used to
S
tore bins for dense feature
* \brief Used to
s
tore bins for dense feature
* Use template to reduce memory cost
*/
template
<
typename
VAL_T
>
...
...
src/io/ordered_sparse_bin.hpp
View file @
0b9fe27a
...
...
@@ -13,7 +13,7 @@
namespace
LightGBM
{
/*!
* \brief Ordered bin for sparse feature .
e
fficient for construct histogram, especally for sparse bin
* \brief Ordered bin for sparse feature .
E
fficient for construct histogram, especally for sparse bin
* There are 2 advantages for using ordered bin.
* 1. group the data by leaf, improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram cconsturction for sparse feature.
...
...
src/metric/binary_metric.hpp
View file @
0b9fe27a
...
...
@@ -225,7 +225,7 @@ public:
}
private:
/*! \brief Output frequen
tl
y */
/*! \brief Output frequen
c
y */
int
output_freq_
;
/*! \brief Number of data */
data_size_t
num_data_
;
...
...
src/metric/dcg_calculator.cpp
View file @
0b9fe27a
...
...
@@ -21,7 +21,7 @@ void DCGCalculator::Init(std::vector<double> input_label_gain) {
label_gain_
=
input_label_gain
;
discount_
.
clear
();
for
(
data_size_t
i
=
0
;
i
<
kMaxPosition
;
++
i
)
{
discount_
.
emplace_back
(
1.0
/
std
::
log
(
2.0
+
i
));
discount_
.
emplace_back
(
1.0
/
std
::
log
2
(
2.0
+
i
));
}
is_inited_
=
true
;
}
...
...
src/metric/regression_metric.hpp
View file @
0b9fe27a
...
...
@@ -65,7 +65,7 @@ public:
}
private:
/*! \brief Output frequen
tl
y */
/*! \brief Output frequen
c
y */
int
output_freq_
;
/*! \brief Number of data */
data_size_t
num_data_
;
...
...
src/network/network.cpp
View file @
0b9fe27a
...
...
@@ -9,7 +9,7 @@
namespace
LightGBM
{
// static member defin
a
tion
// static member defin
i
tion
int
Network
::
num_machines_
;
int
Network
::
rank_
;
Linkers
*
Network
::
linkers_
;
...
...
@@ -141,7 +141,7 @@ void Network::ReduceScatter(char* input, int input_size, int* block_start, int*
// send local data to neighbor first
linkers_
->
Send
(
recursive_halving_map_
.
neighbor
,
input
,
input_size
);
}
else
if
(
recursive_halving_map_
.
type
==
RecursiveHalvingNodeType
::
GroupLeader
)
{
// rec
i
eve neighbor data first
// rece
i
ve neighbor data first
int
need_recv_cnt
=
input_size
;
linkers_
->
Recv
(
recursive_halving_map_
.
neighbor
,
output
,
need_recv_cnt
);
// reduce
...
...
src/objective/rank_objective.hpp
View file @
0b9fe27a
...
...
@@ -50,7 +50,7 @@ public:
Log
::
Stderr
(
"For NDCG metric, should have query information"
);
}
num_queries_
=
metadata
.
num_queries
();
// cache inverse max DCG, avoid compution many times
// cache inverse max DCG, avoid comput
at
ion many times
inverse_max_dcgs_
=
new
score_t
[
num_queries_
];
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
inverse_max_dcgs_
[
i
]
=
static_cast
<
score_t
>
(
...
...
src/treelearner/feature_histogram.hpp
View file @
0b9fe27a
...
...
@@ -40,7 +40,7 @@ public:
* \brief Construct a histogram
* \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of h
i
ssians of current leaf
* \param sum_hessians sum of h
e
ssians of current leaf
* \param ordered_gradients Orederd gradients
* \param ordered_hessians Ordered hessians
* \param data_indices data indices of current leaf
...
...
@@ -59,7 +59,7 @@ public:
* \param leaf current leaf
* \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of h
i
ssians of current leaf
* \param sum_hessians sum of h
e
ssians of current leaf
* \param gradients
* \param hessian
*/
...
...
@@ -76,7 +76,7 @@ public:
* \brief Set sumup information for current histogram
* \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of h
i
ssians of current leaf
* \param sum_hessians sum of h
e
ssians of current leaf
*/
void
SetSumup
(
data_size_t
num_data
,
score_t
sum_gradients
,
score_t
sum_hessians
)
{
num_data_
=
num_data
;
...
...
src/treelearner/leaf_splits.hpp
View file @
0b9fe27a
...
...
@@ -26,7 +26,7 @@ public:
}
/*!
* \brief Init splits on current leaf, don't need to trave
sal
all data
* \brief Init splits on current leaf, don't need to trave
rse
all data
* \param leaf Index of current leaf
* \param data_partition current data partition
* \param sum_gradients
...
...
@@ -43,7 +43,7 @@ public:
}
/*!
* \brief Init splits on current leaf, need to trave
sal
all data to sum up
* \brief Init splits on current leaf, need to trave
rse
all data to sum up
* \param gradients
* \param hessians
*/
...
...
src/treelearner/parallel_tree_learner.h
View file @
0b9fe27a
...
...
@@ -77,9 +77,9 @@ private:
int
*
block_start_
;
/*! \brief Block size for reduce scatter */
int
*
block_len_
;
/*! \brief Write positions for feature histgrams */
/*! \brief Write positions for feature hist
o
grams */
int
*
buffer_write_start_pos_
;
/*! \brief Read positions for local feature histgrams */
/*! \brief Read positions for local feature hist
o
grams */
int
*
buffer_read_start_pos_
;
/*! \brief Size for reduce scatter */
int
reduce_scatter_size_
;
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment