Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
888e2b18
Commit
888e2b18
authored
Oct 18, 2016
by
Guolin Ke
Committed by
GitHub
Oct 18, 2016
Browse files
Merge pull request #13 from xuehui1991/update_for_typo
update for typo
parents
aee30126
0dcd422a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
10 additions
and
9 deletions
+10
-9
include/LightGBM/application.h
include/LightGBM/application.h
+1
-1
include/LightGBM/bin.h
include/LightGBM/bin.h
+2
-2
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+7
-6
No files found.
include/LightGBM/application.h
View file @
888e2b18
...
@@ -18,7 +18,7 @@ class Metric;
...
@@ -18,7 +18,7 @@ class Metric;
* \brief The entrance of LightGBM. this application has two tasks:
* \brief The entrance of LightGBM. this application has two tasks:
* Train and Predict.
* Train and Predict.
* Train task will train a new model
* Train task will train a new model
* Predict task will predict
ing
the scores of test data
then
sav
ing
the score to local disk
* Predict task will predict the scores of test data
and
sav
e
the score to local disk
*/
*/
class
Application
{
class
Application
{
public:
public:
...
...
include/LightGBM/bin.h
View file @
888e2b18
...
@@ -119,10 +119,10 @@ private:
...
@@ -119,10 +119,10 @@ private:
};
};
/*!
/*!
* \brief Interface for ordered bin data. efficient for construct histogram, especally for sparse bin
* \brief Interface for ordered bin data.
It very
efficient for construct histogram, espec
i
ally for sparse bin
* There are 2 advantages for using ordered bin.
* There are 2 advantages for using ordered bin.
* 1. group the data by leaf, improve the cache hit.
* 1. group the data by leaf, improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram
c
consturction for sparse feature.
* 2. only store the non-zero bin, which can speed up the histogram consturction for sparse feature.
* But it has a additional cost, it need re-order the bins after leaf split, which will cost much for dense feature.
* But it has a additional cost, it need re-order the bins after leaf split, which will cost much for dense feature.
* So we only use ordered bin for sparse features now.
* So we only use ordered bin for sparse features now.
*/
*/
...
...
src/boosting/gbdt.cpp
View file @
888e2b18
...
@@ -152,7 +152,7 @@ void GBDT::Bagging(int iter) {
...
@@ -152,7 +152,7 @@ void GBDT::Bagging(int iter) {
}
}
void
GBDT
::
UpdateScoreOutOfBag
(
const
Tree
*
tree
)
{
void
GBDT
::
UpdateScoreOutOfBag
(
const
Tree
*
tree
)
{
// we need to predict out-of-bag
data's socres
for boosing
// we need to predict out-of-bag
socres of data
for boos
t
ing
if
(
out_of_bag_data_indices_
!=
nullptr
)
{
if
(
out_of_bag_data_indices_
!=
nullptr
)
{
train_score_updater_
->
train_score_updater_
->
AddScore
(
tree
,
out_of_bag_data_indices_
,
out_of_bag_data_cnt_
);
AddScore
(
tree
,
out_of_bag_data_indices_
,
out_of_bag_data_cnt_
);
...
@@ -169,12 +169,12 @@ void GBDT::Train() {
...
@@ -169,12 +169,12 @@ void GBDT::Train() {
Bagging
(
iter
);
Bagging
(
iter
);
// train a new tree
// train a new tree
Tree
*
new_tree
=
TrainOneTree
();
Tree
*
new_tree
=
TrainOneTree
();
// if canno
n
learn a new tree, stop
// if canno
t
learn a new tree,
then
stop
if
(
new_tree
->
num_leaves
()
<=
1
)
{
if
(
new_tree
->
num_leaves
()
<=
1
)
{
Log
::
Stdout
(
"Cannot do any boosting for tree cannot split"
);
Log
::
Stdout
(
"Cannot do any boosting for tree cannot split"
);
break
;
break
;
}
}
//
S
hrinkage by learning rate
//
s
hrinkage by learning rate
new_tree
->
Shrinkage
(
gbdt_config_
->
learning_rate
);
new_tree
->
Shrinkage
(
gbdt_config_
->
learning_rate
);
// update score
// update score
UpdateScore
(
new_tree
);
UpdateScore
(
new_tree
);
...
@@ -183,12 +183,12 @@ void GBDT::Train() {
...
@@ -183,12 +183,12 @@ void GBDT::Train() {
OutputMetric
(
iter
+
1
);
OutputMetric
(
iter
+
1
);
// add model
// add model
models_
.
push_back
(
new_tree
);
models_
.
push_back
(
new_tree
);
//
writ
e model to file
on ev
er
y
iteration
//
sav
e model to file
p
er iteration
fprintf
(
output_model_file
,
"Tree=%d
\n
"
,
iter
);
fprintf
(
output_model_file
,
"Tree=%d
\n
"
,
iter
);
fprintf
(
output_model_file
,
"%s
\n
"
,
new_tree
->
ToString
().
c_str
());
fprintf
(
output_model_file
,
"%s
\n
"
,
new_tree
->
ToString
().
c_str
());
fflush
(
output_model_file
);
fflush
(
output_model_file
);
auto
end_time
=
std
::
chrono
::
high_resolution_clock
::
now
();
auto
end_time
=
std
::
chrono
::
high_resolution_clock
::
now
();
// output used time
on each
iteration
// output used time
per
iteration
Log
::
Stdout
(
"%f seconds elapsed, finished %d iteration"
,
std
::
chrono
::
duration
<
double
,
Log
::
Stdout
(
"%f seconds elapsed, finished %d iteration"
,
std
::
chrono
::
duration
<
double
,
std
::
milli
>
(
end_time
-
start_time
)
*
1e-3
,
iter
+
1
);
std
::
milli
>
(
end_time
-
start_time
)
*
1e-3
,
iter
+
1
);
}
}
...
@@ -223,7 +223,7 @@ void GBDT::OutputMetric(int iter) {
...
@@ -223,7 +223,7 @@ void GBDT::OutputMetric(int iter) {
}
}
void
GBDT
::
Boosting
()
{
void
GBDT
::
Boosting
()
{
// objective function will calculat
ion
gradients and hessians
// objective function will calculat
e
gradients and hessians
object_function_
->
object_function_
->
GetGradients
(
train_score_updater_
->
score
(),
gradients_
,
hessians_
);
GetGradients
(
train_score_updater_
->
score
(),
gradients_
,
hessians_
);
}
}
...
@@ -248,6 +248,7 @@ std::string GBDT::ModelsToString() const {
...
@@ -248,6 +248,7 @@ std::string GBDT::ModelsToString() const {
void
GBDT
::
ModelsFromString
(
const
std
::
string
&
model_str
,
int
num_used_model
)
{
void
GBDT
::
ModelsFromString
(
const
std
::
string
&
model_str
,
int
num_used_model
)
{
// use serialized string to restore this object
// use serialized string to restore this object
// deseialize string to object????
models_
.
clear
();
models_
.
clear
();
std
::
vector
<
std
::
string
>
lines
=
Common
::
Split
(
model_str
.
c_str
(),
'\n'
);
std
::
vector
<
std
::
string
>
lines
=
Common
::
Split
(
model_str
.
c_str
(),
'\n'
);
size_t
i
=
0
;
size_t
i
=
0
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment