Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
d3657628
Commit
d3657628
authored
Oct 25, 2016
by
Qiwei Ye
Committed by
GitHub
Oct 25, 2016
Browse files
Merge pull request #36 from xuehui1991/update_for_dcg
update for typo.
parents
a6a75fe9
bb6971b4
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
25 additions
and
24 deletions
+25
-24
src/boosting/gbdt.h
src/boosting/gbdt.h
+3
-3
src/boosting/score_updater.hpp
src/boosting/score_updater.hpp
+2
-2
src/io/dataset.cpp
src/io/dataset.cpp
+3
-3
src/io/dense_bin.hpp
src/io/dense_bin.hpp
+1
-1
src/metric/binary_metric.hpp
src/metric/binary_metric.hpp
+1
-1
src/metric/regression_metric.hpp
src/metric/regression_metric.hpp
+1
-1
src/network/network.cpp
src/network/network.cpp
+2
-2
src/objective/rank_objective.hpp
src/objective/rank_objective.hpp
+1
-1
src/treelearner/feature_histogram.hpp
src/treelearner/feature_histogram.hpp
+3
-3
src/treelearner/leaf_splits.hpp
src/treelearner/leaf_splits.hpp
+2
-1
src/treelearner/parallel_tree_learner.h
src/treelearner/parallel_tree_learner.h
+2
-2
src/treelearner/serial_tree_learner.cpp
src/treelearner/serial_tree_learner.cpp
+3
-3
src/treelearner/serial_tree_learner.h
src/treelearner/serial_tree_learner.h
+1
-1
No files found.
src/boosting/gbdt.h
View file @
d3657628
...
...
@@ -115,7 +115,7 @@ private:
*/
void
UpdateScore
(
const
Tree
*
tree
);
/*!
* \brief Print
M
etric result of current iteration
* \brief Print
m
etric result of current iteration
* \param iter Current interation
*/
bool
OutputMetric
(
int
iter
);
...
...
@@ -126,11 +126,11 @@ private:
const
Dataset
*
train_data_
;
/*! \brief Config of gbdt */
const
GBDTConfig
*
gbdt_config_
;
/*! \brief Tree learner, will use t
i
hs class to learn trees */
/*! \brief Tree learner, will use th
i
s class to learn trees */
TreeLearner
*
tree_learner_
;
/*! \brief Objective function */
const
ObjectiveFunction
*
object_function_
;
/*! \brief Store and update traning data's score */
/*! \brief Store and update tra
i
ning data's score */
ScoreUpdater
*
train_score_updater_
;
/*! \brief Metrics for training data */
std
::
vector
<
const
Metric
*>
training_metrics_
;
...
...
src/boosting/score_updater.hpp
View file @
d3657628
...
...
@@ -57,8 +57,8 @@ public:
* \brief Using tree model to get prediction number, then adding to scores for parts of data
* Used for prediction of training out-of-bag data
* \param tree Trained tree model
* \param data_indices Indices of data that w
ant
proccess
to
* \param data_cnt Number of data that w
ant
proccess
to
* \param data_indices Indices of data that w
ill be
proccess
ed
* \param data_cnt Number of data that w
ill be
proccess
ed
*/
inline
void
AddScore
(
const
Tree
*
tree
,
const
data_size_t
*
data_indices
,
data_size_t
data_cnt
)
{
...
...
src/io/dataset.cpp
View file @
d3657628
...
...
@@ -36,7 +36,7 @@ Dataset::Dataset(const char* data_filename, const char* init_score_filename,
// create text reader
text_reader_
=
new
TextReader
<
data_size_t
>
(
data_filename
);
}
else
{
// only need to load initilize score, other meta data will load from bin flie
// only need to load initilize score, other meta data will
be
load
ed
from bin flie
metadata_
.
Init
(
init_score_filename
);
Log
::
Info
(
"Loading data set from binary file"
);
parser_
=
nullptr
;
...
...
@@ -613,7 +613,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
size_t
size_of_metadata
=
*
(
reinterpret_cast
<
size_t
*>
(
buffer
));
// re-alloc
m
ate space if not enough
// re-allocate space if not enough
if
(
size_of_metadata
>
buffer_size
)
{
delete
[]
buffer
;
buffer_size
=
size_of_metadata
;
...
...
@@ -673,7 +673,7 @@ void Dataset::LoadDataFromBinFile(int rank, int num_machines, bool is_pre_partit
Log
::
Fatal
(
"Binary file format error at feature %d's size"
,
i
);
}
size_t
size_of_feature
=
*
(
reinterpret_cast
<
size_t
*>
(
buffer
));
// re-alloc
m
ate space if not enough
// re-allocate space if not enough
if
(
size_of_feature
>
buffer_size
)
{
delete
[]
buffer
;
buffer_size
=
size_of_feature
;
...
...
src/io/dense_bin.hpp
View file @
d3657628
...
...
@@ -10,7 +10,7 @@
namespace
LightGBM
{
/*!
* \brief Used to
S
tore bins for dense feature
* \brief Used to
s
tore bins for dense feature
* Use template to reduce memory cost
*/
template
<
typename
VAL_T
>
...
...
src/metric/binary_metric.hpp
View file @
d3657628
...
...
@@ -238,7 +238,7 @@ public:
}
private:
/*! \brief Output frequen
tl
y */
/*! \brief Output frequen
c
y */
int
output_freq_
;
/*! \brief Number of data */
data_size_t
num_data_
;
...
...
src/metric/regression_metric.hpp
View file @
d3657628
...
...
@@ -72,7 +72,7 @@ public:
}
private:
/*! \brief Output frequen
tl
y */
/*! \brief Output frequen
c
y */
int
output_freq_
;
/*! \brief Number of data */
data_size_t
num_data_
;
...
...
src/network/network.cpp
View file @
d3657628
...
...
@@ -9,7 +9,7 @@
namespace
LightGBM
{
// static member defin
a
tion
// static member defin
i
tion
int
Network
::
num_machines_
;
int
Network
::
rank_
;
Linkers
*
Network
::
linkers_
;
...
...
@@ -141,7 +141,7 @@ void Network::ReduceScatter(char* input, int input_size, int* block_start, int*
// send local data to neighbor first
linkers_
->
Send
(
recursive_halving_map_
.
neighbor
,
input
,
input_size
);
}
else
if
(
recursive_halving_map_
.
type
==
RecursiveHalvingNodeType
::
GroupLeader
)
{
// rec
i
eve neighbor data first
// rece
i
ve neighbor data first
int
need_recv_cnt
=
input_size
;
linkers_
->
Recv
(
recursive_halving_map_
.
neighbor
,
output
,
need_recv_cnt
);
// reduce
...
...
src/objective/rank_objective.hpp
View file @
d3657628
...
...
@@ -50,7 +50,7 @@ public:
Log
::
Fatal
(
"For NDCG metric, should have query information"
);
}
num_queries_
=
metadata
.
num_queries
();
// cache inverse max DCG, avoid compution many times
// cache inverse max DCG, avoid comput
at
ion many times
inverse_max_dcgs_
=
new
score_t
[
num_queries_
];
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
inverse_max_dcgs_
[
i
]
=
static_cast
<
score_t
>
(
...
...
src/treelearner/feature_histogram.hpp
View file @
d3657628
...
...
@@ -40,7 +40,7 @@ public:
* \brief Construct a histogram
* \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of h
i
ssians of current leaf
* \param sum_hessians sum of h
e
ssians of current leaf
* \param ordered_gradients Orederd gradients
* \param ordered_hessians Ordered hessians
* \param data_indices data indices of current leaf
...
...
@@ -59,7 +59,7 @@ public:
* \param leaf current leaf
* \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of h
i
ssians of current leaf
* \param sum_hessians sum of h
e
ssians of current leaf
* \param gradients
* \param hessian
*/
...
...
@@ -76,7 +76,7 @@ public:
* \brief Set sumup information for current histogram
* \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of h
i
ssians of current leaf
* \param sum_hessians sum of h
e
ssians of current leaf
*/
void
SetSumup
(
data_size_t
num_data
,
score_t
sum_gradients
,
score_t
sum_hessians
)
{
num_data_
=
num_data
;
...
...
src/treelearner/leaf_splits.hpp
View file @
d3657628
...
...
@@ -26,6 +26,7 @@ public:
}
/*!
* \brief Init split on current leaf on partial data.
* \param leaf Index of current leaf
* \param data_partition current data partition
...
...
@@ -43,7 +44,7 @@ public:
}
/*!
* \brief Init splits on current leaf, it will travese all data to sum up the results
* \brief Init splits on current leaf, it will trave
r
se all data to sum up the results
* \param gradients
* \param hessians
*/
...
...
src/treelearner/parallel_tree_learner.h
View file @
d3657628
...
...
@@ -77,9 +77,9 @@ private:
int
*
block_start_
;
/*! \brief Block size for reduce scatter */
int
*
block_len_
;
/*! \brief Write positions for feature histgrams */
/*! \brief Write positions for feature hist
o
grams */
int
*
buffer_write_start_pos_
;
/*! \brief Read positions for local feature histgrams */
/*! \brief Read positions for local feature hist
o
grams */
int
*
buffer_read_start_pos_
;
/*! \brief Size for reduce scatter */
int
reduce_scatter_size_
;
...
...
src/treelearner/serial_tree_learner.cpp
View file @
d3657628
...
...
@@ -107,7 +107,7 @@ void SerialTreeLearner::Init(const Dataset* train_data) {
// initialize ordered gradients and hessians
ordered_gradients_
=
new
score_t
[
num_data_
];
ordered_hessians_
=
new
score_t
[
num_data_
];
// if has ordered bin, need allocat
a
a buffer to fast split
// if has ordered bin, need allocat
e
a buffer to fast split
if
(
has_ordered_bin_
)
{
is_data_in_leaf_
=
new
char
[
num_data_
];
}
...
...
@@ -269,14 +269,14 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) {
}
else
if
(
num_data_in_left_child
<
num_data_in_right_child
)
{
smaller_leaf
=
left_leaf
;
larger_leaf
=
right_leaf
;
// put parent(left) leaf's histograms into larger leaf's histgrams
// put parent(left) leaf's histograms into larger leaf's hist
o
grams
if
(
histogram_pool_
.
Get
(
left_leaf
,
&
larger_leaf_histogram_array_
))
{
parent_leaf_histogram_array_
=
larger_leaf_histogram_array_
;
}
histogram_pool_
.
Move
(
left_leaf
,
right_leaf
);
histogram_pool_
.
Get
(
left_leaf
,
&
smaller_leaf_histogram_array_
);
}
else
{
smaller_leaf
=
right_leaf
;
larger_leaf
=
left_leaf
;
// put parent(left) leaf's histograms to larger leaf's histgrams
// put parent(left) leaf's histograms to larger leaf's hist
o
grams
if
(
histogram_pool_
.
Get
(
left_leaf
,
&
larger_leaf_histogram_array_
))
{
parent_leaf_histogram_array_
=
larger_leaf_histogram_array_
;
}
histogram_pool_
.
Get
(
right_leaf
,
&
smaller_leaf_histogram_array_
);
}
...
...
src/treelearner/serial_tree_learner.h
View file @
d3657628
...
...
@@ -121,7 +121,7 @@ protected:
DataPartition
*
data_partition_
;
/*! \brief used for generate used features */
Random
random_
;
/*! \brief used for sub feature training, is_feature_used_[i] = fal
a
se means don't used feature i */
/*! \brief used for sub feature training, is_feature_used_[i] = false means don't used feature i */
bool
*
is_feature_used_
;
/*! \brief pointer to histograms array of parent of current leaves */
FeatureHistogram
*
parent_leaf_histogram_array_
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment