Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
e404d7cf
Commit
e404d7cf
authored
Mar 30, 2017
by
Guolin Ke
Browse files
only support boost_from_average in regression.
parent
f40e0d2e
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
25 additions
and
33 deletions
+25
-33
docs/Parameters.md
docs/Parameters.md
+1
-1
include/LightGBM/config.h
include/LightGBM/config.h
+1
-0
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+19
-26
src/treelearner/data_partition.hpp
src/treelearner/data_partition.hpp
+2
-4
src/treelearner/serial_tree_learner.cpp
src/treelearner/serial_tree_learner.cpp
+1
-1
src/treelearner/serial_tree_learner.h
src/treelearner/serial_tree_learner.h
+1
-1
No files found.
docs/Parameters.md
View file @
e404d7cf
...
@@ -184,7 +184,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
...
@@ -184,7 +184,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
*
```scale_pos_weight```
, default=
```1.0```
, type=double
*
```scale_pos_weight```
, default=
```1.0```
, type=double
*
weight of positive class in binary classification task
*
weight of positive class in binary classification task
*
```boost_from_average```
, default=
```true```
, type=bool
*
```boost_from_average```
, default=
```true```
, type=bool
*
adjust initial score
of each observation
to the mean of
each clas
s for faster convergence
*
adjust initial score to the mean of
label
s for faster convergence
, only used in Regression task.
*
```is_unbalance```
, default=
```false```
, type=bool
*
```is_unbalance```
, default=
```false```
, type=bool
*
used in binary classification. Set this to
```true```
if training data are unbalance.
*
used in binary classification. Set this to
```true```
if training data are unbalance.
*
```max_position```
, default=
```20```
, type=int
*
```max_position```
, default=
```20```
, type=int
...
...
include/LightGBM/config.h
View file @
e404d7cf
...
@@ -213,6 +213,7 @@ public:
...
@@ -213,6 +213,7 @@ public:
int
drop_seed
=
4
;
int
drop_seed
=
4
;
double
top_rate
=
0.2
f
;
double
top_rate
=
0.2
f
;
double
other_rate
=
0.1
f
;
double
other_rate
=
0.1
f
;
// only used for the regression. Will boost from the average labels.
bool
boost_from_average
=
true
;
bool
boost_from_average
=
true
;
std
::
string
tree_learner_type
=
"serial"
;
std
::
string
tree_learner_type
=
"serial"
;
TreeConfig
tree_config
;
TreeConfig
tree_config
;
...
...
src/boosting/gbdt.cpp
View file @
e404d7cf
...
@@ -299,32 +299,26 @@ void GBDT::UpdateScoreOutOfBag(const Tree* tree, const int curr_class) {
...
@@ -299,32 +299,26 @@ void GBDT::UpdateScoreOutOfBag(const Tree* tree, const int curr_class) {
}
}
bool
GBDT
::
TrainOneIter
(
const
score_t
*
gradient
,
const
score_t
*
hessian
,
bool
is_eval
)
{
bool
GBDT
::
TrainOneIter
(
const
score_t
*
gradient
,
const
score_t
*
hessian
,
bool
is_eval
)
{
// boosting from average prediction. It doesn't work well for
binary
classification, remove it for now.
// boosting from average prediction. It doesn't work well for classification, remove it for now.
if
(
models_
.
empty
()
if
(
models_
.
empty
()
&&
gbdt_config_
->
boost_from_average
&&
gbdt_config_
->
boost_from_average
&&
!
train_score_updater_
->
has_init_score
()
&&
!
train_score_updater_
->
has_init_score
()
&&
sigmoid_
<
0.0
f
)
{
&&
sigmoid_
<
0.0
f
std
::
vector
<
double
>
sum_per_class
(
num_class_
,
0.0
f
);
&&
num_class_
<=
1
)
{
double
init_score
=
0.0
f
;
auto
label
=
train_data_
->
metadata
().
label
();
auto
label
=
train_data_
->
metadata
().
label
();
if
(
num_class_
>
1
)
{
#pragma omp parallel for schedule(static) reduction(+:init_score)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
sum_per_class
[
static_cast
<
int
>
(
label
[
i
])]
+=
1.0
f
;
init_score
+=
label
[
i
];
}
}
}
else
{
init_score
/=
num_data_
;
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
unique_ptr
<
Tree
>
new_tree
(
new
Tree
(
2
));
sum_per_class
[
0
]
+=
label
[
i
];
new_tree
->
Split
(
0
,
0
,
BinType
::
NumericalBin
,
0
,
0
,
0
,
init_score
,
init_score
,
0
,
num_data_
,
1
);
}
train_score_updater_
->
AddScore
(
init_score
,
0
);
}
for
(
auto
&
score_updater
:
valid_score_updater_
)
{
for
(
int
curr_class
=
0
;
curr_class
<
num_class_
;
++
curr_class
)
{
score_updater
->
AddScore
(
init_score
,
0
);
double
init_score
=
sum_per_class
[
curr_class
]
/
num_data_
;
std
::
unique_ptr
<
Tree
>
new_tree
(
new
Tree
(
2
));
new_tree
->
Split
(
0
,
0
,
BinType
::
NumericalBin
,
0
,
0
,
0
,
init_score
,
init_score
,
0
,
num_data_
,
1
);
train_score_updater_
->
AddScore
(
init_score
,
curr_class
);
for
(
auto
&
score_updater
:
valid_score_updater_
)
{
score_updater
->
AddScore
(
init_score
,
curr_class
);
}
models_
.
push_back
(
std
::
move
(
new_tree
));
}
}
models_
.
push_back
(
std
::
move
(
new_tree
));
boost_from_average_
=
true
;
boost_from_average_
=
true
;
}
}
// boosting first
// boosting first
...
@@ -359,7 +353,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
...
@@ -359,7 +353,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
// get sub gradients
// get sub gradients
for
(
int
curr_class
=
0
;
curr_class
<
num_class_
;
++
curr_class
)
{
for
(
int
curr_class
=
0
;
curr_class
<
num_class_
;
++
curr_class
)
{
auto
bias
=
curr_class
*
num_data_
;
auto
bias
=
curr_class
*
num_data_
;
// cannot multi-threading
// cannot multi-threading
here.
for
(
int
i
=
0
;
i
<
bag_data_cnt_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
bag_data_cnt_
;
++
i
)
{
gradients_
[
bias
+
i
]
=
gradient
[
bias
+
bag_data_indices_
[
i
]];
gradients_
[
bias
+
i
]
=
gradient
[
bias
+
bag_data_indices_
[
i
]];
hessians_
[
bias
+
i
]
=
hessian
[
bias
+
bag_data_indices_
[
i
]];
hessians_
[
bias
+
i
]
=
hessian
[
bias
+
bag_data_indices_
[
i
]];
...
@@ -376,9 +370,8 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
...
@@ -376,9 +370,8 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
#ifdef TIMETAG
#ifdef TIMETAG
start_time
=
std
::
chrono
::
steady_clock
::
now
();
start_time
=
std
::
chrono
::
steady_clock
::
now
();
#endif
#endif
std
::
unique_ptr
<
Tree
>
new_tree
;
std
::
unique_ptr
<
Tree
>
new_tree
(
// train a new tree
tree_learner_
->
Train
(
gradient
+
curr_class
*
num_data_
,
hessian
+
curr_class
*
num_data_
));
new_tree
.
reset
(
tree_learner_
->
Train
(
gradient
+
curr_class
*
num_data_
,
hessian
+
curr_class
*
num_data_
));
#ifdef TIMETAG
#ifdef TIMETAG
tree_time
+=
std
::
chrono
::
steady_clock
::
now
()
-
start_time
;
tree_time
+=
std
::
chrono
::
steady_clock
::
now
()
-
start_time
;
#endif
#endif
...
@@ -390,7 +383,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
...
@@ -390,7 +383,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
// update score
// update score
UpdateScore
(
new_tree
.
get
(),
curr_class
);
UpdateScore
(
new_tree
.
get
(),
curr_class
);
UpdateScoreOutOfBag
(
new_tree
.
get
(),
curr_class
);
UpdateScoreOutOfBag
(
new_tree
.
get
(),
curr_class
);
}
}
// add model
// add model
models_
.
push_back
(
std
::
move
(
new_tree
));
models_
.
push_back
(
std
::
move
(
new_tree
));
}
}
...
...
src/treelearner/data_partition.hpp
View file @
e404d7cf
...
@@ -55,10 +55,8 @@ public:
...
@@ -55,10 +55,8 @@ public:
* \brief Init, will put all data on the root(leaf_idx = 0)
* \brief Init, will put all data on the root(leaf_idx = 0)
*/
*/
void
Init
()
{
void
Init
()
{
for
(
int
i
=
0
;
i
<
num_leaves_
;
++
i
)
{
std
::
fill
(
leaf_begin_
.
begin
(),
leaf_begin_
.
end
(),
0
);
leaf_count_
[
i
]
=
0
;
std
::
fill
(
leaf_count_
.
begin
(),
leaf_count_
.
end
(),
0
);
}
leaf_begin_
[
0
]
=
0
;
if
(
used_data_indices_
==
nullptr
)
{
if
(
used_data_indices_
==
nullptr
)
{
// if using all data
// if using all data
leaf_count_
[
0
]
=
num_data_
;
leaf_count_
[
0
]
=
num_data_
;
...
...
src/treelearner/serial_tree_learner.cpp
View file @
e404d7cf
...
@@ -226,7 +226,7 @@ Tree* SerialTreeLearner::FitByExistingTree(const Tree* old_tree, const score_t*
...
@@ -226,7 +226,7 @@ Tree* SerialTreeLearner::FitByExistingTree(const Tree* old_tree, const score_t*
CHECK
(
data_partition_
->
num_leaves
()
>=
tree
->
num_leaves
());
CHECK
(
data_partition_
->
num_leaves
()
>=
tree
->
num_leaves
());
OMP_INIT_EX
();
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
int
i
=
0
;
i
<
data_partition_
->
num_leaves
();
++
i
)
{
for
(
int
i
=
0
;
i
<
tree
->
num_leaves
();
++
i
)
{
OMP_LOOP_EX_BEGIN
();
OMP_LOOP_EX_BEGIN
();
data_size_t
cnt_leaf_data
=
0
;
data_size_t
cnt_leaf_data
=
0
;
auto
tmp_idx
=
data_partition_
->
GetIndexOnLeaf
(
i
,
&
cnt_leaf_data
);
auto
tmp_idx
=
data_partition_
->
GetIndexOnLeaf
(
i
,
&
cnt_leaf_data
);
...
...
src/treelearner/serial_tree_learner.h
View file @
e404d7cf
...
@@ -48,7 +48,7 @@ public:
...
@@ -48,7 +48,7 @@ public:
if
(
tree
->
num_leaves
()
<=
1
)
{
return
;
}
if
(
tree
->
num_leaves
()
<=
1
)
{
return
;
}
CHECK
(
tree
->
num_leaves
()
<=
data_partition_
->
num_leaves
());
CHECK
(
tree
->
num_leaves
()
<=
data_partition_
->
num_leaves
());
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
int
i
=
0
;
i
<
data_partition_
->
num_leaves
();
++
i
)
{
for
(
int
i
=
0
;
i
<
tree
->
num_leaves
();
++
i
)
{
double
output
=
static_cast
<
double
>
(
tree
->
LeafOutput
(
i
));
double
output
=
static_cast
<
double
>
(
tree
->
LeafOutput
(
i
));
data_size_t
cnt_leaf_data
=
0
;
data_size_t
cnt_leaf_data
=
0
;
auto
tmp_idx
=
data_partition_
->
GetIndexOnLeaf
(
i
,
&
cnt_leaf_data
);
auto
tmp_idx
=
data_partition_
->
GetIndexOnLeaf
(
i
,
&
cnt_leaf_data
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment