Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
90127b52
Commit
90127b52
authored
Feb 02, 2019
by
Nikita Titov
Committed by
Guolin Ke
Feb 02, 2019
Browse files
cpplint whitespaces and new lines (#1986)
parent
6f548ada
Changes
79
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
108 additions
and
137 deletions
+108
-137
src/network/socket_wrapper.hpp
src/network/socket_wrapper.hpp
+3
-4
src/objective/binary_objective.hpp
src/objective/binary_objective.hpp
+5
-5
src/objective/multiclass_objective.hpp
src/objective/multiclass_objective.hpp
+2
-4
src/objective/objective_function.cpp
src/objective/objective_function.cpp
+1
-1
src/objective/rank_objective.hpp
src/objective/rank_objective.hpp
+0
-2
src/objective/regression_objective.hpp
src/objective/regression_objective.hpp
+6
-13
src/objective/xentropy_objective.hpp
src/objective/xentropy_objective.hpp
+3
-5
src/treelearner/data_parallel_tree_learner.cpp
src/treelearner/data_parallel_tree_learner.cpp
+0
-1
src/treelearner/data_partition.hpp
src/treelearner/data_partition.hpp
+0
-1
src/treelearner/feature_histogram.hpp
src/treelearner/feature_histogram.hpp
+8
-14
src/treelearner/feature_parallel_tree_learner.cpp
src/treelearner/feature_parallel_tree_learner.cpp
+0
-1
src/treelearner/gpu_tree_learner.cpp
src/treelearner/gpu_tree_learner.cpp
+54
-57
src/treelearner/gpu_tree_learner.h
src/treelearner/gpu_tree_learner.h
+11
-10
src/treelearner/leaf_splits.hpp
src/treelearner/leaf_splits.hpp
+1
-1
src/treelearner/parallel_tree_learner.h
src/treelearner/parallel_tree_learner.h
+3
-1
src/treelearner/serial_tree_learner.cpp
src/treelearner/serial_tree_learner.cpp
+9
-11
src/treelearner/serial_tree_learner.h
src/treelearner/serial_tree_learner.h
+1
-2
src/treelearner/split_info.hpp
src/treelearner/split_info.hpp
+0
-2
src/treelearner/voting_parallel_tree_learner.cpp
src/treelearner/voting_parallel_tree_learner.cpp
+1
-2
No files found.
src/network/socket_wrapper.hpp
View file @
90127b52
...
@@ -51,8 +51,7 @@ const int INVALID_SOCKET = -1;
...
@@ -51,8 +51,7 @@ const int INVALID_SOCKET = -1;
#ifdef _WIN32
#ifdef _WIN32
#ifndef _MSC_VER
#ifndef _MSC_VER
// not using visual studio in windows
// not using visual studio in windows
inline
int
inet_pton
(
int
af
,
const
char
*
src
,
void
*
dst
)
inline
int
inet_pton
(
int
af
,
const
char
*
src
,
void
*
dst
)
{
{
struct
sockaddr_storage
ss
;
struct
sockaddr_storage
ss
;
int
size
=
sizeof
(
ss
);
int
size
=
sizeof
(
ss
);
char
src_copy
[
INET6_ADDRSTRLEN
+
1
];
char
src_copy
[
INET6_ADDRSTRLEN
+
1
];
...
@@ -119,11 +118,11 @@ public:
...
@@ -119,11 +118,11 @@ public:
if
(
sockfd_
==
INVALID_SOCKET
)
{
if
(
sockfd_
==
INVALID_SOCKET
)
{
return
;
return
;
}
}
if
(
setsockopt
(
sockfd_
,
SOL_SOCKET
,
SO_RCVBUF
,
reinterpret_cast
<
const
char
*>
(
&
SocketConfig
::
kSocketBufferSize
),
sizeof
(
SocketConfig
::
kSocketBufferSize
))
!=
0
)
{
if
(
setsockopt
(
sockfd_
,
SOL_SOCKET
,
SO_RCVBUF
,
reinterpret_cast
<
const
char
*>
(
&
SocketConfig
::
kSocketBufferSize
),
sizeof
(
SocketConfig
::
kSocketBufferSize
))
!=
0
)
{
Log
::
Warning
(
"Set SO_RCVBUF failed, please increase your net.core.rmem_max to 100k at least"
);
Log
::
Warning
(
"Set SO_RCVBUF failed, please increase your net.core.rmem_max to 100k at least"
);
}
}
if
(
setsockopt
(
sockfd_
,
SOL_SOCKET
,
SO_SNDBUF
,
reinterpret_cast
<
const
char
*>
(
&
SocketConfig
::
kSocketBufferSize
),
sizeof
(
SocketConfig
::
kSocketBufferSize
))
!=
0
)
{
if
(
setsockopt
(
sockfd_
,
SOL_SOCKET
,
SO_SNDBUF
,
reinterpret_cast
<
const
char
*>
(
&
SocketConfig
::
kSocketBufferSize
),
sizeof
(
SocketConfig
::
kSocketBufferSize
))
!=
0
)
{
Log
::
Warning
(
"Set SO_SNDBUF failed, please increase your net.core.wmem_max to 100k at least"
);
Log
::
Warning
(
"Set SO_SNDBUF failed, please increase your net.core.wmem_max to 100k at least"
);
}
}
...
...
src/objective/binary_objective.hpp
View file @
90127b52
...
@@ -19,7 +19,7 @@ public:
...
@@ -19,7 +19,7 @@ public:
}
}
is_unbalance_
=
config
.
is_unbalance
;
is_unbalance_
=
config
.
is_unbalance
;
scale_pos_weight_
=
static_cast
<
double
>
(
config
.
scale_pos_weight
);
scale_pos_weight_
=
static_cast
<
double
>
(
config
.
scale_pos_weight
);
if
(
is_unbalance_
&&
std
::
fabs
(
scale_pos_weight_
-
1.0
f
)
>
1e-6
)
{
if
(
is_unbalance_
&&
std
::
fabs
(
scale_pos_weight_
-
1.0
f
)
>
1e-6
)
{
Log
::
Fatal
(
"Cannot set is_unbalance and scale_pos_weight at the same time"
);
Log
::
Fatal
(
"Cannot set is_unbalance and scale_pos_weight at the same time"
);
}
}
is_pos_
=
is_pos
;
is_pos_
=
is_pos
;
...
@@ -54,7 +54,7 @@ public:
...
@@ -54,7 +54,7 @@ public:
// REMOVEME: remove the warning after 2.4 version release
// REMOVEME: remove the warning after 2.4 version release
Log
::
Warning
(
"Starting from the 2.1.2 version, default value for "
Log
::
Warning
(
"Starting from the 2.1.2 version, default value for "
"the
\"
boost_from_average
\"
parameter in
\"
binary
\"
objective is true.
\n
"
"the
\"
boost_from_average
\"
parameter in
\"
binary
\"
objective is true.
\n
"
"This may cause significantly different results comparing to the previous versions of LightGBM.
\n
"
"This may cause significantly different results comparing to the previous versions of LightGBM.
\n
"
"Try to set boost_from_average=false, if your old models produce bad results"
);
"Try to set boost_from_average=false, if your old models produce bad results"
);
// count for positive and negative samples
// count for positive and negative samples
#pragma omp parallel for schedule(static) reduction(+:cnt_positive, cnt_negative)
#pragma omp parallel for schedule(static) reduction(+:cnt_positive, cnt_negative)
...
@@ -123,13 +123,13 @@ public:
...
@@ -123,13 +123,13 @@ public:
}
}
}
}
}
}
// implement custom average to boost from (if enabled among options)
// implement custom average to boost from (if enabled among options)
double
BoostFromScore
(
int
)
const
override
{
double
BoostFromScore
(
int
)
const
override
{
double
suml
=
0.0
f
;
double
suml
=
0.0
f
;
double
sumw
=
0.0
f
;
double
sumw
=
0.0
f
;
if
(
weights_
!=
nullptr
)
{
if
(
weights_
!=
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:suml,sumw)
#pragma omp parallel for schedule(static) reduction(+:suml,
sumw)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
suml
+=
is_pos_
(
label_
[
i
])
*
weights_
[
i
];
suml
+=
is_pos_
(
label_
[
i
])
*
weights_
[
i
];
sumw
+=
weights_
[
i
];
sumw
+=
weights_
[
i
];
...
@@ -149,7 +149,7 @@ public:
...
@@ -149,7 +149,7 @@ public:
return
initscore
;
return
initscore
;
}
}
bool
ClassNeedTrain
(
int
/*class_id*/
)
const
override
{
bool
ClassNeedTrain
(
int
/*class_id*/
)
const
override
{
return
need_train_
;
return
need_train_
;
}
}
...
...
src/objective/multiclass_objective.hpp
View file @
90127b52
...
@@ -35,7 +35,6 @@ public:
...
@@ -35,7 +35,6 @@ public:
}
}
~
MulticlassSoftmax
()
{
~
MulticlassSoftmax
()
{
}
}
void
Init
(
const
Metadata
&
metadata
,
data_size_t
num_data
)
override
{
void
Init
(
const
Metadata
&
metadata
,
data_size_t
num_data
)
override
{
...
@@ -138,8 +137,8 @@ public:
...
@@ -138,8 +137,8 @@ public:
return
std
::
log
(
std
::
max
<
double
>
(
kEpsilon
,
class_init_probs_
[
class_id
]));
return
std
::
log
(
std
::
max
<
double
>
(
kEpsilon
,
class_init_probs_
[
class_id
]));
}
}
bool
ClassNeedTrain
(
int
class_id
)
const
override
{
bool
ClassNeedTrain
(
int
class_id
)
const
override
{
if
(
std
::
fabs
(
class_init_probs_
[
class_id
])
<=
kEpsilon
if
(
std
::
fabs
(
class_init_probs_
[
class_id
])
<=
kEpsilon
||
std
::
fabs
(
class_init_probs_
[
class_id
])
>=
1.0
-
kEpsilon
)
{
||
std
::
fabs
(
class_init_probs_
[
class_id
])
>=
1.0
-
kEpsilon
)
{
return
false
;
return
false
;
}
else
{
}
else
{
...
@@ -197,7 +196,6 @@ public:
...
@@ -197,7 +196,6 @@ public:
}
}
~
MulticlassOVA
()
{
~
MulticlassOVA
()
{
}
}
void
Init
(
const
Metadata
&
metadata
,
data_size_t
num_data
)
override
{
void
Init
(
const
Metadata
&
metadata
,
data_size_t
num_data
)
override
{
...
...
src/objective/objective_function.cpp
View file @
90127b52
...
@@ -9,7 +9,7 @@ namespace LightGBM {
...
@@ -9,7 +9,7 @@ namespace LightGBM {
ObjectiveFunction
*
ObjectiveFunction
::
CreateObjectiveFunction
(
const
std
::
string
&
type
,
const
Config
&
config
)
{
ObjectiveFunction
*
ObjectiveFunction
::
CreateObjectiveFunction
(
const
std
::
string
&
type
,
const
Config
&
config
)
{
if
(
type
==
std
::
string
(
"regression"
)
||
type
==
std
::
string
(
"regression_l2"
)
if
(
type
==
std
::
string
(
"regression"
)
||
type
==
std
::
string
(
"regression_l2"
)
||
type
==
std
::
string
(
"mean_squared_error"
)
||
type
==
std
::
string
(
"mse"
)
||
type
==
std
::
string
(
"mean_squared_error"
)
||
type
==
std
::
string
(
"mse"
)
||
type
==
std
::
string
(
"l2_root"
)
||
type
==
std
::
string
(
"root_mean_squared_error"
)
||
type
==
std
::
string
(
"rmse"
))
{
||
type
==
std
::
string
(
"l2_root"
)
||
type
==
std
::
string
(
"root_mean_squared_error"
)
||
type
==
std
::
string
(
"rmse"
))
{
return
new
RegressionL2loss
(
config
);
return
new
RegressionL2loss
(
config
);
}
else
if
(
type
==
std
::
string
(
"regression_l1"
)
||
type
==
std
::
string
(
"mean_absolute_error"
)
||
type
==
std
::
string
(
"mae"
))
{
}
else
if
(
type
==
std
::
string
(
"regression_l1"
)
||
type
==
std
::
string
(
"mean_absolute_error"
)
||
type
==
std
::
string
(
"mae"
))
{
...
...
src/objective/rank_objective.hpp
View file @
90127b52
...
@@ -34,11 +34,9 @@ public:
...
@@ -34,11 +34,9 @@ public:
}
}
explicit
LambdarankNDCG
(
const
std
::
vector
<
std
::
string
>&
)
{
explicit
LambdarankNDCG
(
const
std
::
vector
<
std
::
string
>&
)
{
}
}
~
LambdarankNDCG
()
{
~
LambdarankNDCG
()
{
}
}
void
Init
(
const
Metadata
&
metadata
,
data_size_t
num_data
)
override
{
void
Init
(
const
Metadata
&
metadata
,
data_size_t
num_data
)
override
{
num_data_
=
num_data
;
num_data_
=
num_data
;
...
...
src/objective/regression_objective.hpp
View file @
90127b52
...
@@ -78,7 +78,7 @@ public:
...
@@ -78,7 +78,7 @@ public:
}
}
}
}
}
}
~
RegressionL2loss
()
{
~
RegressionL2loss
()
{
}
}
...
@@ -146,7 +146,7 @@ public:
...
@@ -146,7 +146,7 @@ public:
double
suml
=
0.0
f
;
double
suml
=
0.0
f
;
double
sumw
=
0.0
f
;
double
sumw
=
0.0
f
;
if
(
weights_
!=
nullptr
)
{
if
(
weights_
!=
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:suml,sumw)
#pragma omp parallel for schedule(static) reduction(+:suml,
sumw)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
suml
+=
label_
[
i
]
*
weights_
[
i
];
suml
+=
label_
[
i
]
*
weights_
[
i
];
sumw
+=
weights_
[
i
];
sumw
+=
weights_
[
i
];
...
@@ -221,7 +221,7 @@ public:
...
@@ -221,7 +221,7 @@ public:
bool
IsRenewTreeOutput
()
const
override
{
return
true
;
}
bool
IsRenewTreeOutput
()
const
override
{
return
true
;
}
double
RenewTreeOutput
(
double
,
const
double
*
pred
,
double
RenewTreeOutput
(
double
,
const
double
*
pred
,
const
data_size_t
*
index_mapper
,
const
data_size_t
*
index_mapper
,
const
data_size_t
*
bagging_mapper
,
const
data_size_t
*
bagging_mapper
,
data_size_t
num_data_in_leaf
)
const
override
{
data_size_t
num_data_in_leaf
)
const
override
{
...
@@ -253,7 +253,7 @@ public:
...
@@ -253,7 +253,7 @@ public:
}
}
}
}
double
RenewTreeOutput
(
double
,
double
pred
,
double
RenewTreeOutput
(
double
,
double
pred
,
const
data_size_t
*
index_mapper
,
const
data_size_t
*
index_mapper
,
const
data_size_t
*
bagging_mapper
,
const
data_size_t
*
bagging_mapper
,
data_size_t
num_data_in_leaf
)
const
override
{
data_size_t
num_data_in_leaf
)
const
override
{
...
@@ -362,7 +362,6 @@ public:
...
@@ -362,7 +362,6 @@ public:
}
}
explicit
RegressionFairLoss
(
const
std
::
vector
<
std
::
string
>&
strs
)
:
RegressionL2loss
(
strs
)
{
explicit
RegressionFairLoss
(
const
std
::
vector
<
std
::
string
>&
strs
)
:
RegressionL2loss
(
strs
)
{
}
}
~
RegressionFairLoss
()
{}
~
RegressionFairLoss
()
{}
...
@@ -414,7 +413,6 @@ public:
...
@@ -414,7 +413,6 @@ public:
}
}
explicit
RegressionPoissonLoss
(
const
std
::
vector
<
std
::
string
>&
strs
)
:
RegressionL2loss
(
strs
)
{
explicit
RegressionPoissonLoss
(
const
std
::
vector
<
std
::
string
>&
strs
)
:
RegressionL2loss
(
strs
)
{
}
}
~
RegressionPoissonLoss
()
{}
~
RegressionPoissonLoss
()
{}
...
@@ -492,7 +490,6 @@ public:
...
@@ -492,7 +490,6 @@ public:
}
}
explicit
RegressionQuantileloss
(
const
std
::
vector
<
std
::
string
>&
strs
)
:
RegressionL2loss
(
strs
)
{
explicit
RegressionQuantileloss
(
const
std
::
vector
<
std
::
string
>&
strs
)
:
RegressionL2loss
(
strs
)
{
}
}
~
RegressionQuantileloss
()
{}
~
RegressionQuantileloss
()
{}
...
@@ -620,7 +617,6 @@ public:
...
@@ -620,7 +617,6 @@ public:
}
}
explicit
RegressionMAPELOSS
(
const
std
::
vector
<
std
::
string
>&
strs
)
:
RegressionL1loss
(
strs
)
{
explicit
RegressionMAPELOSS
(
const
std
::
vector
<
std
::
string
>&
strs
)
:
RegressionL1loss
(
strs
)
{
}
}
~
RegressionMAPELOSS
()
{}
~
RegressionMAPELOSS
()
{}
...
@@ -727,7 +723,6 @@ public:
...
@@ -727,7 +723,6 @@ public:
private:
private:
std
::
vector
<
label_t
>
label_weight_
;
std
::
vector
<
label_t
>
label_weight_
;
};
};
...
@@ -741,7 +736,6 @@ public:
...
@@ -741,7 +736,6 @@ public:
}
}
explicit
RegressionGammaLoss
(
const
std
::
vector
<
std
::
string
>&
strs
)
:
RegressionPoissonLoss
(
strs
)
{
explicit
RegressionGammaLoss
(
const
std
::
vector
<
std
::
string
>&
strs
)
:
RegressionPoissonLoss
(
strs
)
{
}
}
~
RegressionGammaLoss
()
{}
~
RegressionGammaLoss
()
{}
...
@@ -766,7 +760,6 @@ public:
...
@@ -766,7 +760,6 @@ public:
const
char
*
GetName
()
const
override
{
const
char
*
GetName
()
const
override
{
return
"gamma"
;
return
"gamma"
;
}
}
};
};
/*!
/*!
...
@@ -779,7 +772,6 @@ public:
...
@@ -779,7 +772,6 @@ public:
}
}
explicit
RegressionTweedieLoss
(
const
std
::
vector
<
std
::
string
>&
strs
)
:
RegressionPoissonLoss
(
strs
)
{
explicit
RegressionTweedieLoss
(
const
std
::
vector
<
std
::
string
>&
strs
)
:
RegressionPoissonLoss
(
strs
)
{
}
}
~
RegressionTweedieLoss
()
{}
~
RegressionTweedieLoss
()
{}
...
@@ -790,7 +782,7 @@ public:
...
@@ -790,7 +782,7 @@ public:
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
gradients
[
i
]
=
static_cast
<
score_t
>
(
-
label_
[
i
]
*
std
::
exp
((
1
-
rho_
)
*
score
[
i
])
+
std
::
exp
((
2
-
rho_
)
*
score
[
i
]));
gradients
[
i
]
=
static_cast
<
score_t
>
(
-
label_
[
i
]
*
std
::
exp
((
1
-
rho_
)
*
score
[
i
])
+
std
::
exp
((
2
-
rho_
)
*
score
[
i
]));
hessians
[
i
]
=
static_cast
<
score_t
>
(
-
label_
[
i
]
*
(
1
-
rho_
)
*
std
::
exp
((
1
-
rho_
)
*
score
[
i
])
+
hessians
[
i
]
=
static_cast
<
score_t
>
(
-
label_
[
i
]
*
(
1
-
rho_
)
*
std
::
exp
((
1
-
rho_
)
*
score
[
i
])
+
(
2
-
rho_
)
*
std
::
exp
((
2
-
rho_
)
*
score
[
i
]));
(
2
-
rho_
)
*
std
::
exp
((
2
-
rho_
)
*
score
[
i
]));
}
}
}
else
{
}
else
{
...
@@ -806,6 +798,7 @@ public:
...
@@ -806,6 +798,7 @@ public:
const
char
*
GetName
()
const
override
{
const
char
*
GetName
()
const
override
{
return
"tweedie"
;
return
"tweedie"
;
}
}
private:
private:
double
rho_
;
double
rho_
;
};
};
...
...
src/objective/xentropy_objective.hpp
View file @
90127b52
...
@@ -65,7 +65,6 @@ public:
...
@@ -65,7 +65,6 @@ public:
Log
::
Fatal
(
"[%s]: sum of weights is zero"
,
GetName
());
Log
::
Fatal
(
"[%s]: sum of weights is zero"
,
GetName
());
}
}
}
}
}
}
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
...
@@ -108,7 +107,7 @@ public:
...
@@ -108,7 +107,7 @@ public:
double
suml
=
0.0
f
;
double
suml
=
0.0
f
;
double
sumw
=
0.0
f
;
double
sumw
=
0.0
f
;
if
(
weights_
!=
nullptr
)
{
if
(
weights_
!=
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:suml,sumw)
#pragma omp parallel for schedule(static) reduction(+:suml,
sumw)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
suml
+=
label_
[
i
]
*
weights_
[
i
];
suml
+=
label_
[
i
]
*
weights_
[
i
];
sumw
+=
weights_
[
i
];
sumw
+=
weights_
[
i
];
...
@@ -161,7 +160,6 @@ public:
...
@@ -161,7 +160,6 @@ public:
Log
::
Info
(
"[%s:%s]: (objective) labels passed interval [0, 1] check"
,
GetName
(),
__func__
);
Log
::
Info
(
"[%s:%s]: (objective) labels passed interval [0, 1] check"
,
GetName
(),
__func__
);
if
(
weights_
!=
nullptr
)
{
if
(
weights_
!=
nullptr
)
{
Common
::
ObtainMinMaxSum
(
weights_
,
num_data_
,
&
min_weight_
,
&
max_weight_
,
(
label_t
*
)
nullptr
);
Common
::
ObtainMinMaxSum
(
weights_
,
num_data_
,
&
min_weight_
,
&
max_weight_
,
(
label_t
*
)
nullptr
);
if
(
min_weight_
<=
0.0
f
)
{
if
(
min_weight_
<=
0.0
f
)
{
Log
::
Fatal
(
"[%s]: at least one weight is non-positive"
,
GetName
());
Log
::
Fatal
(
"[%s]: at least one weight is non-positive"
,
GetName
());
...
@@ -196,7 +194,7 @@ public:
...
@@ -196,7 +194,7 @@ public:
const
double
epf
=
std
::
exp
(
score
[
i
]);
const
double
epf
=
std
::
exp
(
score
[
i
]);
const
double
hhat
=
std
::
log
(
1.0
f
+
epf
);
const
double
hhat
=
std
::
log
(
1.0
f
+
epf
);
const
double
z
=
1.0
f
-
std
::
exp
(
-
w
*
hhat
);
const
double
z
=
1.0
f
-
std
::
exp
(
-
w
*
hhat
);
const
double
enf
=
1.0
f
/
epf
;
// = std::exp(-score[i]);
const
double
enf
=
1.0
f
/
epf
;
// = std::exp(-score[i]);
gradients
[
i
]
=
static_cast
<
score_t
>
((
1.0
f
-
y
/
z
)
*
w
/
(
1.0
f
+
enf
));
gradients
[
i
]
=
static_cast
<
score_t
>
((
1.0
f
-
y
/
z
)
*
w
/
(
1.0
f
+
enf
));
const
double
c
=
1.0
f
/
(
1.0
f
-
z
);
const
double
c
=
1.0
f
/
(
1.0
f
-
z
);
double
d
=
1.0
f
+
epf
;
double
d
=
1.0
f
+
epf
;
...
@@ -235,7 +233,7 @@ public:
...
@@ -235,7 +233,7 @@ public:
double
suml
=
0.0
f
;
double
suml
=
0.0
f
;
double
sumw
=
0.0
f
;
double
sumw
=
0.0
f
;
if
(
weights_
!=
nullptr
)
{
if
(
weights_
!=
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:suml,sumw)
#pragma omp parallel for schedule(static) reduction(+:suml,
sumw)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
suml
+=
label_
[
i
]
*
weights_
[
i
];
suml
+=
label_
[
i
]
*
weights_
[
i
];
sumw
+=
weights_
[
i
];
sumw
+=
weights_
[
i
];
...
...
src/treelearner/data_parallel_tree_learner.cpp
View file @
90127b52
...
@@ -14,7 +14,6 @@ DataParallelTreeLearner<TREELEARNER_T>::DataParallelTreeLearner(const Config* co
...
@@ -14,7 +14,6 @@ DataParallelTreeLearner<TREELEARNER_T>::DataParallelTreeLearner(const Config* co
template
<
typename
TREELEARNER_T
>
template
<
typename
TREELEARNER_T
>
DataParallelTreeLearner
<
TREELEARNER_T
>::~
DataParallelTreeLearner
()
{
DataParallelTreeLearner
<
TREELEARNER_T
>::~
DataParallelTreeLearner
()
{
}
}
template
<
typename
TREELEARNER_T
>
template
<
typename
TREELEARNER_T
>
...
...
src/treelearner/data_partition.hpp
View file @
90127b52
...
@@ -48,7 +48,6 @@ public:
...
@@ -48,7 +48,6 @@ public:
temp_right_indices_
.
resize
(
num_data_
);
temp_right_indices_
.
resize
(
num_data_
);
}
}
~
DataPartition
()
{
~
DataPartition
()
{
}
}
/*!
/*!
...
...
src/treelearner/feature_histogram.hpp
View file @
90127b52
...
@@ -9,8 +9,7 @@
...
@@ -9,8 +9,7 @@
#include <cstring>
#include <cstring>
#include <cmath>
#include <cmath>
namespace
LightGBM
namespace
LightGBM
{
{
class
FeatureMetainfo
{
class
FeatureMetainfo
{
public:
public:
...
@@ -83,7 +82,6 @@ public:
...
@@ -83,7 +82,6 @@ public:
void
FindBestThresholdNumerical
(
double
sum_gradient
,
double
sum_hessian
,
data_size_t
num_data
,
double
min_constraint
,
double
max_constraint
,
void
FindBestThresholdNumerical
(
double
sum_gradient
,
double
sum_hessian
,
data_size_t
num_data
,
double
min_constraint
,
double
max_constraint
,
SplitInfo
*
output
)
{
SplitInfo
*
output
)
{
is_splittable_
=
false
;
is_splittable_
=
false
;
double
gain_shift
=
GetLeafSplitGain
(
sum_gradient
,
sum_hessian
,
double
gain_shift
=
GetLeafSplitGain
(
sum_gradient
,
sum_hessian
,
meta_
->
config
->
lambda_l1
,
meta_
->
config
->
lambda_l2
,
meta_
->
config
->
max_delta_step
);
meta_
->
config
->
lambda_l1
,
meta_
->
config
->
lambda_l2
,
meta_
->
config
->
max_delta_step
);
...
@@ -118,7 +116,7 @@ public:
...
@@ -118,7 +116,7 @@ public:
double
best_sum_left_gradient
=
0
;
double
best_sum_left_gradient
=
0
;
double
best_sum_left_hessian
=
0
;
double
best_sum_left_hessian
=
0
;
double
gain_shift
=
GetLeafSplitGain
(
sum_gradient
,
sum_hessian
,
meta_
->
config
->
lambda_l1
,
meta_
->
config
->
lambda_l2
,
meta_
->
config
->
max_delta_step
);
double
gain_shift
=
GetLeafSplitGain
(
sum_gradient
,
sum_hessian
,
meta_
->
config
->
lambda_l1
,
meta_
->
config
->
lambda_l2
,
meta_
->
config
->
max_delta_step
);
double
min_gain_shift
=
gain_shift
+
meta_
->
config
->
min_gain_to_split
;
double
min_gain_shift
=
gain_shift
+
meta_
->
config
->
min_gain_to_split
;
bool
is_full_categorical
=
meta_
->
missing_type
==
MissingType
::
None
;
bool
is_full_categorical
=
meta_
->
missing_type
==
MissingType
::
None
;
int
used_bin
=
meta_
->
num_bin
-
1
+
is_full_categorical
;
int
used_bin
=
meta_
->
num_bin
-
1
+
is_full_categorical
;
...
@@ -336,7 +334,7 @@ public:
...
@@ -336,7 +334,7 @@ public:
output
->
gain
=
kMinScore
;
output
->
gain
=
kMinScore
;
Log
::
Warning
(
"'Forced Split' will be ignored since the gain getting worse. "
);
Log
::
Warning
(
"'Forced Split' will be ignored since the gain getting worse. "
);
return
;
return
;
}
;
}
// update split information
// update split information
output
->
threshold
=
threshold
;
output
->
threshold
=
threshold
;
...
@@ -452,7 +450,6 @@ public:
...
@@ -452,7 +450,6 @@ public:
}
}
private:
private:
static
double
GetSplitGains
(
double
sum_left_gradients
,
double
sum_left_hessians
,
static
double
GetSplitGains
(
double
sum_left_gradients
,
double
sum_left_hessians
,
double
sum_right_gradients
,
double
sum_right_hessians
,
double
sum_right_gradients
,
double
sum_right_hessians
,
double
l1
,
double
l2
,
double
max_delta_step
,
double
l1
,
double
l2
,
double
max_delta_step
,
...
@@ -502,7 +499,6 @@ private:
...
@@ -502,7 +499,6 @@ private:
void
FindBestThresholdSequence
(
double
sum_gradient
,
double
sum_hessian
,
data_size_t
num_data
,
double
min_constraint
,
double
max_constraint
,
void
FindBestThresholdSequence
(
double
sum_gradient
,
double
sum_hessian
,
data_size_t
num_data
,
double
min_constraint
,
double
max_constraint
,
double
min_gain_shift
,
SplitInfo
*
output
,
int
dir
,
bool
skip_default_bin
,
bool
use_na_as_missing
)
{
double
min_gain_shift
,
SplitInfo
*
output
,
int
dir
,
bool
skip_default_bin
,
bool
use_na_as_missing
)
{
const
int8_t
bias
=
meta_
->
bias
;
const
int8_t
bias
=
meta_
->
bias
;
double
best_sum_left_gradient
=
NAN
;
double
best_sum_left_gradient
=
NAN
;
...
@@ -512,7 +508,6 @@ private:
...
@@ -512,7 +508,6 @@ private:
uint32_t
best_threshold
=
static_cast
<
uint32_t
>
(
meta_
->
num_bin
);
uint32_t
best_threshold
=
static_cast
<
uint32_t
>
(
meta_
->
num_bin
);
if
(
dir
==
-
1
)
{
if
(
dir
==
-
1
)
{
double
sum_right_gradient
=
0.0
f
;
double
sum_right_gradient
=
0.0
f
;
double
sum_right_hessian
=
kEpsilon
;
double
sum_right_hessian
=
kEpsilon
;
data_size_t
right_count
=
0
;
data_size_t
right_count
=
0
;
...
@@ -522,7 +517,6 @@ private:
...
@@ -522,7 +517,6 @@ private:
// from right to left, and we don't need data in bin0
// from right to left, and we don't need data in bin0
for
(;
t
>=
t_end
;
--
t
)
{
for
(;
t
>=
t_end
;
--
t
)
{
// need to skip default bin
// need to skip default bin
if
(
skip_default_bin
&&
(
t
+
bias
)
==
static_cast
<
int
>
(
meta_
->
default_bin
))
{
continue
;
}
if
(
skip_default_bin
&&
(
t
+
bias
)
==
static_cast
<
int
>
(
meta_
->
default_bin
))
{
continue
;
}
...
@@ -581,7 +575,6 @@ private:
...
@@ -581,7 +575,6 @@ private:
}
}
for
(;
t
<=
t_end
;
++
t
)
{
for
(;
t
<=
t_end
;
++
t
)
{
// need to skip default bin
// need to skip default bin
if
(
skip_default_bin
&&
(
t
+
bias
)
==
static_cast
<
int
>
(
meta_
->
default_bin
))
{
continue
;
}
if
(
skip_default_bin
&&
(
t
+
bias
)
==
static_cast
<
int
>
(
meta_
->
default_bin
))
{
continue
;
}
if
(
t
>=
0
)
{
if
(
t
>=
0
)
{
...
@@ -645,7 +638,7 @@ private:
...
@@ -645,7 +638,7 @@ private:
const
FeatureMetainfo
*
meta_
;
const
FeatureMetainfo
*
meta_
;
/*! \brief sum of gradient of each bin */
/*! \brief sum of gradient of each bin */
HistogramBinEntry
*
data_
;
HistogramBinEntry
*
data_
;
//std::vector<HistogramBinEntry> data_;
//
std::vector<HistogramBinEntry> data_;
bool
is_splittable_
=
true
;
bool
is_splittable_
=
true
;
std
::
function
<
void
(
double
,
double
,
data_size_t
,
double
,
double
,
SplitInfo
*
)
>
find_best_threshold_fun_
;
std
::
function
<
void
(
double
,
double
,
data_size_t
,
double
,
double
,
SplitInfo
*
)
>
find_best_threshold_fun_
;
...
@@ -701,7 +694,7 @@ public:
...
@@ -701,7 +694,7 @@ public:
if
(
feature_metas_
.
empty
())
{
if
(
feature_metas_
.
empty
())
{
int
num_feature
=
train_data
->
num_features
();
int
num_feature
=
train_data
->
num_features
();
feature_metas_
.
resize
(
num_feature
);
feature_metas_
.
resize
(
num_feature
);
#pragma omp parallel for schedule(static, 512) if(num_feature >= 1024)
#pragma omp parallel for schedule(static, 512) if
(num_feature >= 1024)
for
(
int
i
=
0
;
i
<
num_feature
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_feature
;
++
i
)
{
feature_metas_
[
i
].
num_bin
=
train_data
->
FeatureNumBin
(
i
);
feature_metas_
[
i
].
num_bin
=
train_data
->
FeatureNumBin
(
i
);
feature_metas_
[
i
].
default_bin
=
train_data
->
FeatureBinMapper
(
i
)
->
GetDefaultBin
();
feature_metas_
[
i
].
default_bin
=
train_data
->
FeatureBinMapper
(
i
)
->
GetDefaultBin
();
...
@@ -751,7 +744,7 @@ public:
...
@@ -751,7 +744,7 @@ public:
void
ResetConfig
(
const
Config
*
config
)
{
void
ResetConfig
(
const
Config
*
config
)
{
int
size
=
static_cast
<
int
>
(
feature_metas_
.
size
());
int
size
=
static_cast
<
int
>
(
feature_metas_
.
size
());
#pragma omp parallel for schedule(static, 512) if(size >= 1024)
#pragma omp parallel for schedule(static, 512) if
(size >= 1024)
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
feature_metas_
[
i
].
config
=
config
;
feature_metas_
[
i
].
config
=
config
;
}
}
...
@@ -772,7 +765,7 @@ public:
...
@@ -772,7 +765,7 @@ public:
last_used_time_
[
slot
]
=
++
cur_time_
;
last_used_time_
[
slot
]
=
++
cur_time_
;
return
true
;
return
true
;
}
else
{
}
else
{
// choose the least used slot
// choose the least used slot
int
slot
=
static_cast
<
int
>
(
ArrayArgs
<
int
>::
ArgMin
(
last_used_time_
));
int
slot
=
static_cast
<
int
>
(
ArrayArgs
<
int
>::
ArgMin
(
last_used_time_
));
*
out
=
pool_
[
slot
].
get
();
*
out
=
pool_
[
slot
].
get
();
last_used_time_
[
slot
]
=
++
cur_time_
;
last_used_time_
[
slot
]
=
++
cur_time_
;
...
@@ -810,6 +803,7 @@ public:
...
@@ -810,6 +803,7 @@ public:
last_used_time_
[
slot
]
=
++
cur_time_
;
last_used_time_
[
slot
]
=
++
cur_time_
;
inverse_mapper_
[
slot
]
=
dst_idx
;
inverse_mapper_
[
slot
]
=
dst_idx
;
}
}
private:
private:
std
::
vector
<
std
::
unique_ptr
<
FeatureHistogram
[]
>>
pool_
;
std
::
vector
<
std
::
unique_ptr
<
FeatureHistogram
[]
>>
pool_
;
std
::
vector
<
std
::
vector
<
HistogramBinEntry
>>
data_
;
std
::
vector
<
std
::
vector
<
HistogramBinEntry
>>
data_
;
...
...
src/treelearner/feature_parallel_tree_learner.cpp
View file @
90127b52
...
@@ -14,7 +14,6 @@ FeatureParallelTreeLearner<TREELEARNER_T>::FeatureParallelTreeLearner(const Conf
...
@@ -14,7 +14,6 @@ FeatureParallelTreeLearner<TREELEARNER_T>::FeatureParallelTreeLearner(const Conf
template
<
typename
TREELEARNER_T
>
template
<
typename
TREELEARNER_T
>
FeatureParallelTreeLearner
<
TREELEARNER_T
>::~
FeatureParallelTreeLearner
()
{
FeatureParallelTreeLearner
<
TREELEARNER_T
>::~
FeatureParallelTreeLearner
()
{
}
}
template
<
typename
TREELEARNER_T
>
template
<
typename
TREELEARNER_T
>
...
...
src/treelearner/gpu_tree_learner.cpp
View file @
90127b52
...
@@ -56,15 +56,14 @@ void PrintHistograms(HistogramBinEntry* h, size_t size) {
...
@@ -56,15 +56,14 @@ void PrintHistograms(HistogramBinEntry* h, size_t size) {
printf
(
"
\n
Total examples: %lu
\n
"
,
total
);
printf
(
"
\n
Total examples: %lu
\n
"
,
total
);
}
}
union
Float_t
union
Float_t
{
{
int64_t
i
;
int64_t
i
;
double
f
;
double
f
;
static
int64_t
ulp_diff
(
Float_t
a
,
Float_t
b
)
{
static
int64_t
ulp_diff
(
Float_t
a
,
Float_t
b
)
{
return
abs
(
a
.
i
-
b
.
i
);
return
abs
(
a
.
i
-
b
.
i
);
}
}
};
};
void
CompareHistograms
(
HistogramBinEntry
*
h1
,
HistogramBinEntry
*
h2
,
size_t
size
,
int
feature_id
)
{
void
CompareHistograms
(
HistogramBinEntry
*
h1
,
HistogramBinEntry
*
h2
,
size_t
size
,
int
feature_id
)
{
size_t
i
;
size_t
i
;
...
@@ -144,7 +143,7 @@ void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_featur
...
@@ -144,7 +143,7 @@ void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_featur
printf
(
"Setting exp_workgroups_per_feature to %d, using %u work groups
\n
"
,
exp_workgroups_per_feature
,
num_workgroups
);
printf
(
"Setting exp_workgroups_per_feature to %d, using %u work groups
\n
"
,
exp_workgroups_per_feature
,
num_workgroups
);
printf
(
"Constructing histogram with %d examples
\n
"
,
leaf_num_data
);
printf
(
"Constructing histogram with %d examples
\n
"
,
leaf_num_data
);
#endif
#endif
// the GPU kernel will process all features in one call, and each
// the GPU kernel will process all features in one call, and each
// 2^exp_workgroups_per_feature (compile time constant) workgroup will
// 2^exp_workgroups_per_feature (compile time constant) workgroup will
// process one feature4 tuple
// process one feature4 tuple
...
@@ -184,7 +183,7 @@ void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_featur
...
@@ -184,7 +183,7 @@ void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_featur
// copy the results asynchronously. Size depends on if double precision is used
// copy the results asynchronously. Size depends on if double precision is used
size_t
output_size
=
num_dense_feature4_
*
dword_features_
*
device_bin_size_
*
hist_bin_entry_sz_
;
size_t
output_size
=
num_dense_feature4_
*
dword_features_
*
device_bin_size_
*
hist_bin_entry_sz_
;
boost
::
compute
::
event
histogram_wait_event
;
boost
::
compute
::
event
histogram_wait_event
;
host_histogram_outputs_
=
(
void
*
)
queue_
.
enqueue_map_buffer_async
(
device_histogram_outputs_
,
boost
::
compute
::
command_queue
::
map_read
,
host_histogram_outputs_
=
(
void
*
)
queue_
.
enqueue_map_buffer_async
(
device_histogram_outputs_
,
boost
::
compute
::
command_queue
::
map_read
,
0
,
output_size
,
histogram_wait_event
,
kernel_wait_obj_
);
0
,
output_size
,
histogram_wait_event
,
kernel_wait_obj_
);
// we will wait for this object in WaitAndGetHistograms
// we will wait for this object in WaitAndGetHistograms
histograms_wait_obj_
=
boost
::
compute
::
wait_list
(
histogram_wait_event
);
histograms_wait_obj_
=
boost
::
compute
::
wait_list
(
histogram_wait_event
);
...
@@ -196,13 +195,13 @@ void GPUTreeLearner::WaitAndGetHistograms(HistogramBinEntry* histograms) {
...
@@ -196,13 +195,13 @@ void GPUTreeLearner::WaitAndGetHistograms(HistogramBinEntry* histograms) {
// when the output is ready, the computation is done
// when the output is ready, the computation is done
histograms_wait_obj_
.
wait
();
histograms_wait_obj_
.
wait
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
int
i
=
0
;
i
<
num_dense_feature_groups_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_dense_feature_groups_
;
++
i
)
{
if
(
!
feature_masks_
[
i
])
{
if
(
!
feature_masks_
[
i
])
{
continue
;
continue
;
}
}
int
dense_group_index
=
dense_feature_group_map_
[
i
];
int
dense_group_index
=
dense_feature_group_map_
[
i
];
auto
old_histogram_array
=
histograms
+
train_data_
->
GroupBinBoundary
(
dense_group_index
);
auto
old_histogram_array
=
histograms
+
train_data_
->
GroupBinBoundary
(
dense_group_index
);
int
bin_size
=
train_data_
->
FeatureGroupNumBin
(
dense_group_index
);
int
bin_size
=
train_data_
->
FeatureGroupNumBin
(
dense_group_index
);
if
(
device_bin_mults_
[
i
]
==
1
)
{
if
(
device_bin_mults_
[
i
]
==
1
)
{
for
(
int
j
=
0
;
j
<
bin_size
;
++
j
)
{
for
(
int
j
=
0
;
j
<
bin_size
;
++
j
)
{
old_histogram_array
[
j
].
sum_gradients
=
hist_outputs
[
i
*
device_bin_size_
+
j
].
sum_gradients
;
old_histogram_array
[
j
].
sum_gradients
=
hist_outputs
[
i
*
device_bin_size_
+
j
].
sum_gradients
;
...
@@ -265,36 +264,36 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -265,36 +264,36 @@ void GPUTreeLearner::AllocateGPUMemory() {
if
(
ptr_pinned_feature_masks_
)
{
if
(
ptr_pinned_feature_masks_
)
{
queue_
.
enqueue_unmap_buffer
(
pinned_feature_masks_
,
ptr_pinned_feature_masks_
);
queue_
.
enqueue_unmap_buffer
(
pinned_feature_masks_
,
ptr_pinned_feature_masks_
);
}
}
// make ordered_gradients and hessians larger (including extra room for prefetching), and pin them
// make ordered_gradients and hessians larger (including extra room for prefetching), and pin them
ordered_gradients_
.
reserve
(
allocated_num_data_
);
ordered_gradients_
.
reserve
(
allocated_num_data_
);
ordered_hessians_
.
reserve
(
allocated_num_data_
);
ordered_hessians_
.
reserve
(
allocated_num_data_
);
pinned_gradients_
=
boost
::
compute
::
buffer
();
// deallocate
pinned_gradients_
=
boost
::
compute
::
buffer
();
// deallocate
pinned_gradients_
=
boost
::
compute
::
buffer
(
ctx_
,
allocated_num_data_
*
sizeof
(
score_t
),
pinned_gradients_
=
boost
::
compute
::
buffer
(
ctx_
,
allocated_num_data_
*
sizeof
(
score_t
),
boost
::
compute
::
memory_object
::
read_write
|
boost
::
compute
::
memory_object
::
use_host_ptr
,
boost
::
compute
::
memory_object
::
read_write
|
boost
::
compute
::
memory_object
::
use_host_ptr
,
ordered_gradients_
.
data
());
ordered_gradients_
.
data
());
ptr_pinned_gradients_
=
queue_
.
enqueue_map_buffer
(
pinned_gradients_
,
boost
::
compute
::
command_queue
::
map_write_invalidate_region
,
ptr_pinned_gradients_
=
queue_
.
enqueue_map_buffer
(
pinned_gradients_
,
boost
::
compute
::
command_queue
::
map_write_invalidate_region
,
0
,
allocated_num_data_
*
sizeof
(
score_t
));
0
,
allocated_num_data_
*
sizeof
(
score_t
));
pinned_hessians_
=
boost
::
compute
::
buffer
();
// deallocate
pinned_hessians_
=
boost
::
compute
::
buffer
();
// deallocate
pinned_hessians_
=
boost
::
compute
::
buffer
(
ctx_
,
allocated_num_data_
*
sizeof
(
score_t
),
pinned_hessians_
=
boost
::
compute
::
buffer
(
ctx_
,
allocated_num_data_
*
sizeof
(
score_t
),
boost
::
compute
::
memory_object
::
read_write
|
boost
::
compute
::
memory_object
::
use_host_ptr
,
boost
::
compute
::
memory_object
::
read_write
|
boost
::
compute
::
memory_object
::
use_host_ptr
,
ordered_hessians_
.
data
());
ordered_hessians_
.
data
());
ptr_pinned_hessians_
=
queue_
.
enqueue_map_buffer
(
pinned_hessians_
,
boost
::
compute
::
command_queue
::
map_write_invalidate_region
,
ptr_pinned_hessians_
=
queue_
.
enqueue_map_buffer
(
pinned_hessians_
,
boost
::
compute
::
command_queue
::
map_write_invalidate_region
,
0
,
allocated_num_data_
*
sizeof
(
score_t
));
0
,
allocated_num_data_
*
sizeof
(
score_t
));
// allocate space for gradients and hessians on device
// allocate space for gradients and hessians on device
// we will copy gradients and hessians in after ordered_gradients_ and ordered_hessians_ are constructed
// we will copy gradients and hessians in after ordered_gradients_ and ordered_hessians_ are constructed
device_gradients_
=
boost
::
compute
::
buffer
();
// deallocate
device_gradients_
=
boost
::
compute
::
buffer
();
// deallocate
device_gradients_
=
boost
::
compute
::
buffer
(
ctx_
,
allocated_num_data_
*
sizeof
(
score_t
),
device_gradients_
=
boost
::
compute
::
buffer
(
ctx_
,
allocated_num_data_
*
sizeof
(
score_t
),
boost
::
compute
::
memory_object
::
read_only
,
nullptr
);
boost
::
compute
::
memory_object
::
read_only
,
nullptr
);
device_hessians_
=
boost
::
compute
::
buffer
();
// deallocate
device_hessians_
=
boost
::
compute
::
buffer
();
// deallocate
device_hessians_
=
boost
::
compute
::
buffer
(
ctx_
,
allocated_num_data_
*
sizeof
(
score_t
),
device_hessians_
=
boost
::
compute
::
buffer
(
ctx_
,
allocated_num_data_
*
sizeof
(
score_t
),
boost
::
compute
::
memory_object
::
read_only
,
nullptr
);
boost
::
compute
::
memory_object
::
read_only
,
nullptr
);
// allocate feature mask, for disabling some feature-groups' histogram calculation
// allocate feature mask, for disabling some feature-groups' histogram calculation
feature_masks_
.
resize
(
num_dense_feature4_
*
dword_features_
);
feature_masks_
.
resize
(
num_dense_feature4_
*
dword_features_
);
device_feature_masks_
=
boost
::
compute
::
buffer
();
// deallocate
device_feature_masks_
=
boost
::
compute
::
buffer
();
// deallocate
device_feature_masks_
=
boost
::
compute
::
buffer
(
ctx_
,
num_dense_feature4_
*
dword_features_
,
device_feature_masks_
=
boost
::
compute
::
buffer
(
ctx_
,
num_dense_feature4_
*
dword_features_
,
boost
::
compute
::
memory_object
::
read_only
,
nullptr
);
boost
::
compute
::
memory_object
::
read_only
,
nullptr
);
pinned_feature_masks_
=
boost
::
compute
::
buffer
(
ctx_
,
num_dense_feature4_
*
dword_features_
,
pinned_feature_masks_
=
boost
::
compute
::
buffer
(
ctx_
,
num_dense_feature4_
*
dword_features_
,
boost
::
compute
::
memory_object
::
read_write
|
boost
::
compute
::
memory_object
::
use_host_ptr
,
boost
::
compute
::
memory_object
::
read_write
|
boost
::
compute
::
memory_object
::
use_host_ptr
,
feature_masks_
.
data
());
feature_masks_
.
data
());
ptr_pinned_feature_masks_
=
queue_
.
enqueue_map_buffer
(
pinned_feature_masks_
,
boost
::
compute
::
command_queue
::
map_write_invalidate_region
,
ptr_pinned_feature_masks_
=
queue_
.
enqueue_map_buffer
(
pinned_feature_masks_
,
boost
::
compute
::
command_queue
::
map_write_invalidate_region
,
0
,
num_dense_feature4_
*
dword_features_
);
0
,
num_dense_feature4_
*
dword_features_
);
...
@@ -320,7 +319,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -320,7 +319,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
boost
::
compute
::
fill
(
sync_counters_
->
begin
(),
sync_counters_
->
end
(),
0
,
queue_
);
boost
::
compute
::
fill
(
sync_counters_
->
begin
(),
sync_counters_
->
end
(),
0
,
queue_
);
// The output buffer is allocated to host directly, to overlap compute and data transfer
// The output buffer is allocated to host directly, to overlap compute and data transfer
device_histogram_outputs_
=
boost
::
compute
::
buffer
();
// deallocate
device_histogram_outputs_
=
boost
::
compute
::
buffer
();
// deallocate
device_histogram_outputs_
=
boost
::
compute
::
buffer
(
ctx_
,
num_dense_feature4_
*
dword_features_
*
device_bin_size_
*
hist_bin_entry_sz_
,
device_histogram_outputs_
=
boost
::
compute
::
buffer
(
ctx_
,
num_dense_feature4_
*
dword_features_
*
device_bin_size_
*
hist_bin_entry_sz_
,
boost
::
compute
::
memory_object
::
write_only
|
boost
::
compute
::
memory_object
::
alloc_host_ptr
,
nullptr
);
boost
::
compute
::
memory_object
::
write_only
|
boost
::
compute
::
memory_object
::
alloc_host_ptr
,
nullptr
);
// find the dense feature-groups and group then into Feature4 data structure (several feature-groups packed into 4 bytes)
// find the dense feature-groups and group then into Feature4 data structure (several feature-groups packed into 4 bytes)
int
k
=
0
,
copied_feature4
=
0
;
int
k
=
0
,
copied_feature4
=
0
;
...
@@ -342,7 +341,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -342,7 +341,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
else
{
else
{
sparse_feature_group_map_
.
push_back
(
i
);
sparse_feature_group_map_
.
push_back
(
i
);
}
}
// found
// found
if
(
k
==
dword_features_
)
{
if
(
k
==
dword_features_
)
{
k
=
0
;
k
=
0
;
for
(
int
j
=
0
;
j
<
dword_features_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
dword_features_
;
++
j
)
{
...
@@ -362,8 +361,8 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -362,8 +361,8 @@ void GPUTreeLearner::AllocateGPUMemory() {
// preallocate arrays for all threads, and pin them
// preallocate arrays for all threads, and pin them
for
(
int
i
=
0
;
i
<
nthreads
;
++
i
)
{
for
(
int
i
=
0
;
i
<
nthreads
;
++
i
)
{
host4_vecs
[
i
]
=
(
Feature4
*
)
boost
::
alignment
::
aligned_alloc
(
4096
,
num_data_
*
sizeof
(
Feature4
));
host4_vecs
[
i
]
=
(
Feature4
*
)
boost
::
alignment
::
aligned_alloc
(
4096
,
num_data_
*
sizeof
(
Feature4
));
host4_bufs
[
i
]
=
boost
::
compute
::
buffer
(
ctx_
,
num_data_
*
sizeof
(
Feature4
),
host4_bufs
[
i
]
=
boost
::
compute
::
buffer
(
ctx_
,
num_data_
*
sizeof
(
Feature4
),
boost
::
compute
::
memory_object
::
read_write
|
boost
::
compute
::
memory_object
::
use_host_ptr
,
boost
::
compute
::
memory_object
::
read_write
|
boost
::
compute
::
memory_object
::
use_host_ptr
,
host4_vecs
[
i
]);
host4_vecs
[
i
]);
host4_ptrs
[
i
]
=
(
Feature4
*
)
queue_
.
enqueue_map_buffer
(
host4_bufs
[
i
],
boost
::
compute
::
command_queue
::
map_write_invalidate_region
,
host4_ptrs
[
i
]
=
(
Feature4
*
)
queue_
.
enqueue_map_buffer
(
host4_bufs
[
i
],
boost
::
compute
::
command_queue
::
map_write_invalidate_region
,
0
,
num_data_
*
sizeof
(
Feature4
));
0
,
num_data_
*
sizeof
(
Feature4
));
...
@@ -402,13 +401,13 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -402,13 +401,13 @@ void GPUTreeLearner::AllocateGPUMemory() {
*
static_cast
<
Dense4bitsBinIterator
*>
(
bin_iters
[
6
]),
*
static_cast
<
Dense4bitsBinIterator
*>
(
bin_iters
[
6
]),
*
static_cast
<
Dense4bitsBinIterator
*>
(
bin_iters
[
7
])};
*
static_cast
<
Dense4bitsBinIterator
*>
(
bin_iters
[
7
])};
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
0
]
=
(
uint8_t
)((
iters
[
0
].
RawGet
(
j
)
*
dev_bin_mult
[
0
]
+
((
j
+
0
)
&
(
dev_bin_mult
[
0
]
-
1
)))
host4
[
j
].
s
[
0
]
=
(
uint8_t
)((
iters
[
0
].
RawGet
(
j
)
*
dev_bin_mult
[
0
]
+
((
j
+
0
)
&
(
dev_bin_mult
[
0
]
-
1
)))
|
((
iters
[
1
].
RawGet
(
j
)
*
dev_bin_mult
[
1
]
+
((
j
+
1
)
&
(
dev_bin_mult
[
1
]
-
1
)))
<<
4
));
|
((
iters
[
1
].
RawGet
(
j
)
*
dev_bin_mult
[
1
]
+
((
j
+
1
)
&
(
dev_bin_mult
[
1
]
-
1
)))
<<
4
));
host4
[
j
].
s
[
1
]
=
(
uint8_t
)((
iters
[
2
].
RawGet
(
j
)
*
dev_bin_mult
[
2
]
+
((
j
+
2
)
&
(
dev_bin_mult
[
2
]
-
1
)))
host4
[
j
].
s
[
1
]
=
(
uint8_t
)((
iters
[
2
].
RawGet
(
j
)
*
dev_bin_mult
[
2
]
+
((
j
+
2
)
&
(
dev_bin_mult
[
2
]
-
1
)))
|
((
iters
[
3
].
RawGet
(
j
)
*
dev_bin_mult
[
3
]
+
((
j
+
3
)
&
(
dev_bin_mult
[
3
]
-
1
)))
<<
4
));
|
((
iters
[
3
].
RawGet
(
j
)
*
dev_bin_mult
[
3
]
+
((
j
+
3
)
&
(
dev_bin_mult
[
3
]
-
1
)))
<<
4
));
host4
[
j
].
s
[
2
]
=
(
uint8_t
)((
iters
[
4
].
RawGet
(
j
)
*
dev_bin_mult
[
4
]
+
((
j
+
4
)
&
(
dev_bin_mult
[
4
]
-
1
)))
host4
[
j
].
s
[
2
]
=
(
uint8_t
)((
iters
[
4
].
RawGet
(
j
)
*
dev_bin_mult
[
4
]
+
((
j
+
4
)
&
(
dev_bin_mult
[
4
]
-
1
)))
|
((
iters
[
5
].
RawGet
(
j
)
*
dev_bin_mult
[
5
]
+
((
j
+
5
)
&
(
dev_bin_mult
[
5
]
-
1
)))
<<
4
));
|
((
iters
[
5
].
RawGet
(
j
)
*
dev_bin_mult
[
5
]
+
((
j
+
5
)
&
(
dev_bin_mult
[
5
]
-
1
)))
<<
4
));
host4
[
j
].
s
[
3
]
=
(
uint8_t
)((
iters
[
6
].
RawGet
(
j
)
*
dev_bin_mult
[
6
]
+
((
j
+
6
)
&
(
dev_bin_mult
[
6
]
-
1
)))
host4
[
j
].
s
[
3
]
=
(
uint8_t
)((
iters
[
6
].
RawGet
(
j
)
*
dev_bin_mult
[
6
]
+
((
j
+
6
)
&
(
dev_bin_mult
[
6
]
-
1
)))
|
((
iters
[
7
].
RawGet
(
j
)
*
dev_bin_mult
[
7
]
+
((
j
+
7
)
&
(
dev_bin_mult
[
7
]
-
1
)))
<<
4
));
|
((
iters
[
7
].
RawGet
(
j
)
*
dev_bin_mult
[
7
]
+
((
j
+
7
)
&
(
dev_bin_mult
[
7
]
-
1
)))
<<
4
));
}
}
}
}
...
@@ -432,7 +431,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -432,7 +431,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
}
}
}
}
else
{
else
{
Log
::
Fatal
(
"Bug in GPU tree builder: only DenseBin and Dense4bitsBin are supported"
);
Log
::
Fatal
(
"Bug in GPU tree builder: only DenseBin and Dense4bitsBin are supported"
);
}
}
}
}
}
}
...
@@ -481,7 +480,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -481,7 +480,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
DenseBinIterator
<
uint8_t
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
>*>
(
bin_iter
);
DenseBinIterator
<
uint8_t
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
>*>
(
bin_iter
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
i
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
host4
[
j
].
s
[
i
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
+
((
j
+
i
)
&
(
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
-
1
)));
+
((
j
+
i
)
&
(
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
-
1
)));
}
}
}
}
...
@@ -489,12 +488,12 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -489,12 +488,12 @@ void GPUTreeLearner::AllocateGPUMemory() {
Dense4bitsBinIterator
iter
=
*
static_cast
<
Dense4bitsBinIterator
*>
(
bin_iter
);
Dense4bitsBinIterator
iter
=
*
static_cast
<
Dense4bitsBinIterator
*>
(
bin_iter
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
i
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
host4
[
j
].
s
[
i
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
+
((
j
+
i
)
&
(
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
-
1
)));
+
((
j
+
i
)
&
(
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
-
1
)));
}
}
}
}
else
{
else
{
Log
::
Fatal
(
"BUG in GPU tree builder: only DenseBin and Dense4bitsBin are supported"
);
Log
::
Fatal
(
"BUG in GPU tree builder: only DenseBin and Dense4bitsBin are supported"
);
}
}
}
}
else
{
else
{
...
@@ -538,8 +537,8 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -538,8 +537,8 @@ void GPUTreeLearner::AllocateGPUMemory() {
}
}
// data transfer time
// data transfer time
std
::
chrono
::
duration
<
double
,
std
::
milli
>
end_time
=
std
::
chrono
::
steady_clock
::
now
()
-
start_time
;
std
::
chrono
::
duration
<
double
,
std
::
milli
>
end_time
=
std
::
chrono
::
steady_clock
::
now
()
-
start_time
;
Log
::
Info
(
"%d dense feature groups (%.2f MB) transferred to GPU in %f secs. %d sparse feature groups"
,
Log
::
Info
(
"%d dense feature groups (%.2f MB) transferred to GPU in %f secs. %d sparse feature groups"
,
dense_feature_group_map_
.
size
(),
((
dense_feature_group_map_
.
size
()
+
(
dword_features_
-
1
))
/
dword_features_
)
*
num_data_
*
sizeof
(
Feature4
)
/
(
1024.0
*
1024.0
),
dense_feature_group_map_
.
size
(),
((
dense_feature_group_map_
.
size
()
+
(
dword_features_
-
1
))
/
dword_features_
)
*
num_data_
*
sizeof
(
Feature4
)
/
(
1024.0
*
1024.0
),
end_time
*
1e-3
,
sparse_feature_group_map_
.
size
());
end_time
*
1e-3
,
sparse_feature_group_map_
.
size
());
#if GPU_DEBUG >= 1
#if GPU_DEBUG >= 1
printf
(
"Dense feature group list (size %lu): "
,
dense_feature_group_map_
.
size
());
printf
(
"Dense feature group list (size %lu): "
,
dense_feature_group_map_
.
size
());
...
@@ -596,7 +595,7 @@ void GPUTreeLearner::BuildGPUKernels() {
...
@@ -596,7 +595,7 @@ void GPUTreeLearner::BuildGPUKernels() {
OMP_LOOP_EX_BEGIN
();
OMP_LOOP_EX_BEGIN
();
boost
::
compute
::
program
program
;
boost
::
compute
::
program
program
;
std
::
ostringstream
opts
;
std
::
ostringstream
opts
;
// compile the GPU kernel depending if double precision is used, constant hessian is used, etc
// compile the GPU kernel depending if double precision is used, constant hessian is used, etc
.
opts
<<
" -D POWER_FEATURE_WORKGROUPS="
<<
i
opts
<<
" -D POWER_FEATURE_WORKGROUPS="
<<
i
<<
" -D USE_CONSTANT_BUF="
<<
use_constants
<<
" -D USE_DP_FLOAT="
<<
int
(
config_
->
gpu_use_dp
)
<<
" -D USE_CONSTANT_BUF="
<<
use_constants
<<
" -D USE_DP_FLOAT="
<<
int
(
config_
->
gpu_use_dp
)
<<
" -D CONST_HESSIAN="
<<
int
(
is_constant_hessian_
)
<<
" -D CONST_HESSIAN="
<<
int
(
is_constant_hessian_
)
...
@@ -617,7 +616,7 @@ void GPUTreeLearner::BuildGPUKernels() {
...
@@ -617,7 +616,7 @@ void GPUTreeLearner::BuildGPUKernels() {
}
}
}
}
histogram_kernels_
[
i
]
=
program
.
create_kernel
(
kernel_name_
);
histogram_kernels_
[
i
]
=
program
.
create_kernel
(
kernel_name_
);
// kernel with all features enabled, with elimited branches
// kernel with all features enabled, with elimited branches
opts
<<
" -D ENABLE_ALL_FEATURES=1"
;
opts
<<
" -D ENABLE_ALL_FEATURES=1"
;
try
{
try
{
...
@@ -661,7 +660,7 @@ void GPUTreeLearner::SetupKernelArguments() {
...
@@ -661,7 +660,7 @@ void GPUTreeLearner::SetupKernelArguments() {
for
(
int
i
=
0
;
i
<=
kMaxLogWorkgroupsPerFeature
;
++
i
)
{
for
(
int
i
=
0
;
i
<=
kMaxLogWorkgroupsPerFeature
;
++
i
)
{
// The only argument that needs to be changed later is num_data_
// The only argument that needs to be changed later is num_data_
if
(
is_constant_hessian_
)
{
if
(
is_constant_hessian_
)
{
// hessian is passed as a parameter, but it is not available now.
// hessian is passed as a parameter, but it is not available now.
// hessian will be set in BeforeTrain()
// hessian will be set in BeforeTrain()
histogram_kernels_
[
i
].
set_args
(
*
device_features_
,
device_feature_masks_
,
num_data_
,
histogram_kernels_
[
i
].
set_args
(
*
device_features_
,
device_feature_masks_
,
num_data_
,
*
device_data_indices_
,
num_data_
,
device_gradients_
,
0.0
f
,
*
device_data_indices_
,
num_data_
,
device_gradients_
,
0.0
f
,
...
@@ -711,9 +710,9 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
...
@@ -711,9 +710,9 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
if
((
int
)
platform_devices
.
size
()
>
device_id
)
{
if
((
int
)
platform_devices
.
size
()
>
device_id
)
{
Log
::
Info
(
"Using requested OpenCL platform %d device %d"
,
platform_id
,
device_id
);
Log
::
Info
(
"Using requested OpenCL platform %d device %d"
,
platform_id
,
device_id
);
dev_
=
platform_devices
[
device_id
];
dev_
=
platform_devices
[
device_id
];
}
}
}
}
}
}
// determine which kernel to use based on the max number of bins
// determine which kernel to use based on the max number of bins
if
(
max_num_bin_
<=
16
)
{
if
(
max_num_bin_
<=
16
)
{
kernel_source_
=
kernel16_src_
;
kernel_source_
=
kernel16_src_
;
...
@@ -727,7 +726,7 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
...
@@ -727,7 +726,7 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
device_bin_size_
=
64
;
device_bin_size_
=
64
;
dword_features_
=
4
;
dword_features_
=
4
;
}
}
else
if
(
max_num_bin_
<=
256
)
{
else
if
(
max_num_bin_
<=
256
)
{
kernel_source_
=
kernel256_src_
;
kernel_source_
=
kernel256_src_
;
kernel_name_
=
"histogram256"
;
kernel_name_
=
"histogram256"
;
device_bin_size_
=
256
;
device_bin_size_
=
256
;
...
@@ -736,10 +735,10 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
...
@@ -736,10 +735,10 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
else
{
else
{
Log
::
Fatal
(
"bin size %d cannot run on GPU"
,
max_num_bin_
);
Log
::
Fatal
(
"bin size %d cannot run on GPU"
,
max_num_bin_
);
}
}
if
(
max_num_bin_
==
65
)
{
if
(
max_num_bin_
==
65
)
{
Log
::
Warning
(
"Setting max_bin to 63 is sugguested for best performance"
);
Log
::
Warning
(
"Setting max_bin to 63 is sugguested for best performance"
);
}
}
if
(
max_num_bin_
==
17
)
{
if
(
max_num_bin_
==
17
)
{
Log
::
Warning
(
"Setting max_bin to 15 is sugguested for best performance"
);
Log
::
Warning
(
"Setting max_bin to 15 is sugguested for best performance"
);
}
}
ctx_
=
boost
::
compute
::
context
(
dev_
);
ctx_
=
boost
::
compute
::
context
(
dev_
);
...
@@ -774,7 +773,6 @@ void GPUTreeLearner::ResetTrainingData(const Dataset* train_data) {
...
@@ -774,7 +773,6 @@ void GPUTreeLearner::ResetTrainingData(const Dataset* train_data) {
}
}
void
GPUTreeLearner
::
BeforeTrain
()
{
void
GPUTreeLearner
::
BeforeTrain
()
{
#if GPU_DEBUG >= 2
#if GPU_DEBUG >= 2
printf
(
"Copying intial full gradients and hessians to device
\n
"
);
printf
(
"Copying intial full gradients and hessians to device
\n
"
);
#endif
#endif
...
@@ -861,7 +859,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
...
@@ -861,7 +859,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
// copy indices to the GPU:
// copy indices to the GPU:
#if GPU_DEBUG >= 2
#if GPU_DEBUG >= 2
Log
::
Info
(
"Copying indices, gradients and hessians to GPU..."
);
Log
::
Info
(
"Copying indices, gradients and hessians to GPU..."
);
printf
(
"Indices size %d being copied (left = %d, right = %d)
\n
"
,
end
-
begin
,
num_data_in_left_child
,
num_data_in_right_child
);
printf
(
"Indices size %d being copied (left = %d, right = %d)
\n
"
,
end
-
begin
,
num_data_in_left_child
,
num_data_in_right_child
);
#endif
#endif
indices_future_
=
boost
::
compute
::
copy_async
(
indices
+
begin
,
indices
+
end
,
device_data_indices_
->
begin
(),
queue_
);
indices_future_
=
boost
::
compute
::
copy_async
(
indices
+
begin
,
indices
+
end
,
device_data_indices_
->
begin
(),
queue_
);
...
@@ -893,7 +891,6 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
...
@@ -893,7 +891,6 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
const
data_size_t
*
data_indices
,
data_size_t
num_data
,
const
data_size_t
*
data_indices
,
data_size_t
num_data
,
const
score_t
*
gradients
,
const
score_t
*
hessians
,
const
score_t
*
gradients
,
const
score_t
*
hessians
,
score_t
*
ordered_gradients
,
score_t
*
ordered_hessians
)
{
score_t
*
ordered_gradients
,
score_t
*
ordered_hessians
)
{
if
(
num_data
<=
0
)
{
if
(
num_data
<=
0
)
{
return
false
;
return
false
;
}
}
...
@@ -901,7 +898,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
...
@@ -901,7 +898,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
if
(
!
num_dense_feature_groups_
)
{
if
(
!
num_dense_feature_groups_
)
{
return
false
;
return
false
;
}
}
// copy data indices if it is not null
// copy data indices if it is not null
if
(
data_indices
!=
nullptr
&&
num_data
!=
num_data_
)
{
if
(
data_indices
!=
nullptr
&&
num_data
!=
num_data_
)
{
indices_future_
=
boost
::
compute
::
copy_async
(
data_indices
,
data_indices
+
num_data
,
device_data_indices_
->
begin
(),
queue_
);
indices_future_
=
boost
::
compute
::
copy_async
(
data_indices
,
data_indices
+
num_data
,
device_data_indices_
->
begin
(),
queue_
);
...
@@ -934,15 +931,15 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
...
@@ -934,15 +931,15 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
}
}
// converted indices in is_feature_used to feature-group indices
// converted indices in is_feature_used to feature-group indices
std
::
vector
<
int8_t
>
is_feature_group_used
(
num_feature_groups_
,
0
);
std
::
vector
<
int8_t
>
is_feature_group_used
(
num_feature_groups_
,
0
);
#pragma omp parallel for schedule(static,1024) if (num_features_ >= 2048)
#pragma omp parallel for schedule(static,
1024) if (num_features_ >= 2048)
for
(
int
i
=
0
;
i
<
num_features_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_features_
;
++
i
)
{
if
(
is_feature_used
[
i
])
{
if
(
is_feature_used
[
i
])
{
is_feature_group_used
[
train_data_
->
Feature2Group
(
i
)]
=
1
;
is_feature_group_used
[
train_data_
->
Feature2Group
(
i
)]
=
1
;
}
}
}
}
// construct the feature masks for dense feature-groups
// construct the feature masks for dense feature-groups
int
used_dense_feature_groups
=
0
;
int
used_dense_feature_groups
=
0
;
#pragma omp parallel for schedule(static,1024) reduction(+:used_dense_feature_groups) if (num_dense_feature_groups_ >= 2048)
#pragma omp parallel for schedule(static,
1024) reduction(+:used_dense_feature_groups) if (num_dense_feature_groups_ >= 2048)
for
(
int
i
=
0
;
i
<
num_dense_feature_groups_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_dense_feature_groups_
;
++
i
)
{
if
(
is_feature_group_used
[
dense_feature_group_map_
[
i
]])
{
if
(
is_feature_group_used
[
dense_feature_group_map_
[
i
]])
{
feature_masks_
[
i
]
=
1
;
feature_masks_
[
i
]
=
1
;
...
@@ -1036,7 +1033,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u
...
@@ -1036,7 +1033,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u
num_data
,
num_data
,
num_data
!=
num_data_
?
ordered_gradients_
.
data
()
:
gradients_
,
num_data
!=
num_data_
?
ordered_gradients_
.
data
()
:
gradients_
,
num_data
!=
num_data_
?
ordered_hessians_
.
data
()
:
hessians_
,
num_data
!=
num_data_
?
ordered_hessians_
.
data
()
:
hessians_
,
current_histogram
);
current_histogram
);
CompareHistograms
(
gpu_histogram
,
current_histogram
,
size
,
dense_feature_group_index
);
CompareHistograms
(
gpu_histogram
,
current_histogram
,
size
,
dense_feature_group_index
);
std
::
copy
(
gpu_histogram
,
gpu_histogram
+
size
,
current_histogram
);
std
::
copy
(
gpu_histogram
,
gpu_histogram
+
size
,
current_histogram
);
delete
[]
gpu_histogram
;
delete
[]
gpu_histogram
;
...
@@ -1083,7 +1080,7 @@ void GPUTreeLearner::FindBestSplits() {
...
@@ -1083,7 +1080,7 @@ void GPUTreeLearner::FindBestSplits() {
smaller_leaf_histogram_array_
[
feature_index
].
set_is_splittable
(
false
);
smaller_leaf_histogram_array_
[
feature_index
].
set_is_splittable
(
false
);
continue
;
continue
;
}
}
size_t
bin_size
=
train_data_
->
FeatureNumBin
(
feature_index
)
+
1
;
size_t
bin_size
=
train_data_
->
FeatureNumBin
(
feature_index
)
+
1
;
printf
(
"Feature %d smaller leaf:
\n
"
,
feature_index
);
printf
(
"Feature %d smaller leaf:
\n
"
,
feature_index
);
PrintHistograms
(
smaller_leaf_histogram_array_
[
feature_index
].
RawData
()
-
1
,
bin_size
);
PrintHistograms
(
smaller_leaf_histogram_array_
[
feature_index
].
RawData
()
-
1
,
bin_size
);
if
(
larger_leaf_splits_
==
nullptr
||
larger_leaf_splits_
->
LeafIndex
()
<
0
)
{
continue
;
}
if
(
larger_leaf_splits_
==
nullptr
||
larger_leaf_splits_
->
LeafIndex
()
<
0
)
{
continue
;
}
...
@@ -1124,4 +1121,4 @@ void GPUTreeLearner::Split(Tree* tree, int best_Leaf, int* left_leaf, int* right
...
@@ -1124,4 +1121,4 @@ void GPUTreeLearner::Split(Tree* tree, int best_Leaf, int* left_leaf, int* right
}
}
}
// namespace LightGBM
}
// namespace LightGBM
#endif // USE_GPU
#endif
// USE_GPU
src/treelearner/gpu_tree_learner.h
View file @
90127b52
...
@@ -63,12 +63,13 @@ protected:
...
@@ -63,12 +63,13 @@ protected:
void
FindBestSplits
()
override
;
void
FindBestSplits
()
override
;
void
Split
(
Tree
*
tree
,
int
best_Leaf
,
int
*
left_leaf
,
int
*
right_leaf
)
override
;
void
Split
(
Tree
*
tree
,
int
best_Leaf
,
int
*
left_leaf
,
int
*
right_leaf
)
override
;
void
ConstructHistograms
(
const
std
::
vector
<
int8_t
>&
is_feature_used
,
bool
use_subtract
)
override
;
void
ConstructHistograms
(
const
std
::
vector
<
int8_t
>&
is_feature_used
,
bool
use_subtract
)
override
;
private:
private:
/*! \brief 4-byte feature tuple used by GPU kernels */
/*! \brief 4-byte feature tuple used by GPU kernels */
struct
Feature4
{
struct
Feature4
{
uint8_t
s
[
4
];
uint8_t
s
[
4
];
};
};
/*! \brief Single precision histogram entiry for GPU */
/*! \brief Single precision histogram entiry for GPU */
struct
GPUHistogramBinEntry
{
struct
GPUHistogramBinEntry
{
score_t
sum_gradients
;
score_t
sum_gradients
;
...
@@ -82,7 +83,7 @@ private:
...
@@ -82,7 +83,7 @@ private:
* \return Log2 of the best number for workgroups per feature, in range 0...kMaxLogWorkgroupsPerFeature
* \return Log2 of the best number for workgroups per feature, in range 0...kMaxLogWorkgroupsPerFeature
*/
*/
int
GetNumWorkgroupsPerFeature
(
data_size_t
leaf_num_data
);
int
GetNumWorkgroupsPerFeature
(
data_size_t
leaf_num_data
);
/*!
/*!
* \brief Initialize GPU device, context and command queues
* \brief Initialize GPU device, context and command queues
* Also compiles the OpenCL kernel
* Also compiles the OpenCL kernel
...
@@ -100,7 +101,7 @@ private:
...
@@ -100,7 +101,7 @@ private:
* \brief Compile OpenCL GPU source code to kernel binaries
* \brief Compile OpenCL GPU source code to kernel binaries
*/
*/
void
BuildGPUKernels
();
void
BuildGPUKernels
();
/*!
/*!
* \brief Returns OpenCL kernel build log when compiled with option opts
* \brief Returns OpenCL kernel build log when compiled with option opts
* \param opts OpenCL build options
* \param opts OpenCL build options
...
@@ -120,7 +121,7 @@ private:
...
@@ -120,7 +121,7 @@ private:
* \param use_all_features Set to true to not use feature masks, with a faster kernel
* \param use_all_features Set to true to not use feature masks, with a faster kernel
*/
*/
void
GPUHistogram
(
data_size_t
leaf_num_data
,
bool
use_all_features
);
void
GPUHistogram
(
data_size_t
leaf_num_data
,
bool
use_all_features
);
/*!
/*!
* \brief Wait for GPU kernel execution and read histogram
* \brief Wait for GPU kernel execution and read histogram
* \param histograms Destination of histogram results from GPU.
* \param histograms Destination of histogram results from GPU.
...
@@ -151,7 +152,7 @@ private:
...
@@ -151,7 +152,7 @@ private:
/*! brief Log2 of max number of workgroups per feature*/
/*! brief Log2 of max number of workgroups per feature*/
const
int
kMaxLogWorkgroupsPerFeature
=
10
;
// 2^10
const
int
kMaxLogWorkgroupsPerFeature
=
10
;
// 2^10
/*! brief Max total number of workgroups with preallocated workspace.
/*! brief Max total number of workgroups with preallocated workspace.
* If we use more than this number of workgroups, we have to reallocate subhistograms */
* If we use more than this number of workgroups, we have to reallocate subhistograms */
int
preallocd_max_num_wg_
=
1024
;
int
preallocd_max_num_wg_
=
1024
;
...
@@ -166,15 +167,15 @@ private:
...
@@ -166,15 +167,15 @@ private:
/*! \brief GPU command queue object */
/*! \brief GPU command queue object */
boost
::
compute
::
command_queue
queue_
;
boost
::
compute
::
command_queue
queue_
;
/*! \brief GPU kernel for 256 bins */
/*! \brief GPU kernel for 256 bins */
const
char
*
kernel256_src_
=
const
char
*
kernel256_src_
=
#include "ocl/histogram256.cl"
#include "ocl/histogram256.cl"
;
;
/*! \brief GPU kernel for 64 bins */
/*! \brief GPU kernel for 64 bins */
const
char
*
kernel64_src_
=
const
char
*
kernel64_src_
=
#include "ocl/histogram64.cl"
#include "ocl/histogram64.cl"
;
;
/*! \brief GPU kernel for 16 bins */
/*! \brief GPU kernel for 16 bins */
const
char
*
kernel16_src_
=
const
char
*
kernel16_src_
=
#include "ocl/histogram16.cl"
#include "ocl/histogram16.cl"
;
;
/*! \brief Currently used kernel source */
/*! \brief Currently used kernel source */
...
@@ -266,7 +267,7 @@ private:
...
@@ -266,7 +267,7 @@ private:
// When GPU support is not compiled in, quit with an error message
// When GPU support is not compiled in, quit with an error message
namespace
LightGBM
{
namespace
LightGBM
{
class
GPUTreeLearner
:
public
SerialTreeLearner
{
class
GPUTreeLearner
:
public
SerialTreeLearner
{
public:
public:
#pragma warning(disable : 4702)
#pragma warning(disable : 4702)
...
@@ -276,7 +277,7 @@ public:
...
@@ -276,7 +277,7 @@ public:
}
}
};
};
}
}
// namespace LightGBM
#endif // USE_GPU
#endif // USE_GPU
...
...
src/treelearner/leaf_splits.hpp
View file @
90127b52
...
@@ -129,7 +129,7 @@ public:
...
@@ -129,7 +129,7 @@ public:
/*! \brief Get sum of gradients of current leaf */
/*! \brief Get sum of gradients of current leaf */
double
sum_gradients
()
const
{
return
sum_gradients_
;
}
double
sum_gradients
()
const
{
return
sum_gradients_
;
}
/*! \brief Get sum of hessians of current leaf */
/*! \brief Get sum of hessians of current leaf */
double
sum_hessians
()
const
{
return
sum_hessians_
;
}
double
sum_hessians
()
const
{
return
sum_hessians_
;
}
...
...
src/treelearner/parallel_tree_learner.h
View file @
90127b52
...
@@ -51,6 +51,7 @@ public:
...
@@ -51,6 +51,7 @@ public:
~
DataParallelTreeLearner
();
~
DataParallelTreeLearner
();
void
Init
(
const
Dataset
*
train_data
,
bool
is_constant_hessian
)
override
;
void
Init
(
const
Dataset
*
train_data
,
bool
is_constant_hessian
)
override
;
void
ResetConfig
(
const
Config
*
config
)
override
;
void
ResetConfig
(
const
Config
*
config
)
override
;
protected:
protected:
void
BeforeTrain
()
override
;
void
BeforeTrain
()
override
;
void
FindBestSplits
()
override
;
void
FindBestSplits
()
override
;
...
@@ -104,6 +105,7 @@ public:
...
@@ -104,6 +105,7 @@ public:
~
VotingParallelTreeLearner
()
{
}
~
VotingParallelTreeLearner
()
{
}
void
Init
(
const
Dataset
*
train_data
,
bool
is_constant_hessian
)
override
;
void
Init
(
const
Dataset
*
train_data
,
bool
is_constant_hessian
)
override
;
void
ResetConfig
(
const
Config
*
config
)
override
;
void
ResetConfig
(
const
Config
*
config
)
override
;
protected:
protected:
void
BeforeTrain
()
override
;
void
BeforeTrain
()
override
;
bool
BeforeFindBestSplit
(
const
Tree
*
tree
,
int
left_leaf
,
int
right_leaf
)
override
;
bool
BeforeFindBestSplit
(
const
Tree
*
tree
,
int
left_leaf
,
int
right_leaf
)
override
;
...
@@ -185,7 +187,7 @@ inline void SyncUpGlobalBestSplit(char* input_buffer_, char* output_buffer_, Spl
...
@@ -185,7 +187,7 @@ inline void SyncUpGlobalBestSplit(char* input_buffer_, char* output_buffer_, Spl
int
size
=
SplitInfo
::
Size
(
max_cat_threshold
);
int
size
=
SplitInfo
::
Size
(
max_cat_threshold
);
smaller_best_split
->
CopyTo
(
input_buffer_
);
smaller_best_split
->
CopyTo
(
input_buffer_
);
larger_best_split
->
CopyTo
(
input_buffer_
+
size
);
larger_best_split
->
CopyTo
(
input_buffer_
+
size
);
Network
::
Allreduce
(
input_buffer_
,
size
*
2
,
size
,
output_buffer_
,
Network
::
Allreduce
(
input_buffer_
,
size
*
2
,
size
,
output_buffer_
,
[]
(
const
char
*
src
,
char
*
dst
,
int
size
,
comm_size_t
len
)
{
[]
(
const
char
*
src
,
char
*
dst
,
int
size
,
comm_size_t
len
)
{
comm_size_t
used_size
=
0
;
comm_size_t
used_size
=
0
;
LightSplitInfo
p1
,
p2
;
LightSplitInfo
p1
,
p2
;
...
...
src/treelearner/serial_tree_learner.cpp
View file @
90127b52
...
@@ -18,7 +18,7 @@ std::chrono::duration<double, std::milli> hist_time;
...
@@ -18,7 +18,7 @@ std::chrono::duration<double, std::milli> hist_time;
std
::
chrono
::
duration
<
double
,
std
::
milli
>
find_split_time
;
std
::
chrono
::
duration
<
double
,
std
::
milli
>
find_split_time
;
std
::
chrono
::
duration
<
double
,
std
::
milli
>
split_time
;
std
::
chrono
::
duration
<
double
,
std
::
milli
>
split_time
;
std
::
chrono
::
duration
<
double
,
std
::
milli
>
ordered_bin_time
;
std
::
chrono
::
duration
<
double
,
std
::
milli
>
ordered_bin_time
;
#endif // TIMETAG
#endif
// TIMETAG
SerialTreeLearner
::
SerialTreeLearner
(
const
Config
*
config
)
SerialTreeLearner
::
SerialTreeLearner
(
const
Config
*
config
)
:
config_
(
config
)
{
:
config_
(
config
)
{
...
@@ -253,7 +253,6 @@ Tree* SerialTreeLearner::FitByExistingTree(const Tree* old_tree, const std::vect
...
@@ -253,7 +253,6 @@ Tree* SerialTreeLearner::FitByExistingTree(const Tree* old_tree, const std::vect
}
}
void
SerialTreeLearner
::
BeforeTrain
()
{
void
SerialTreeLearner
::
BeforeTrain
()
{
// reset histogram pool
// reset histogram pool
histogram_pool_
.
ResetMap
();
histogram_pool_
.
ResetMap
();
...
@@ -322,7 +321,7 @@ void SerialTreeLearner::BeforeTrain() {
...
@@ -322,7 +321,7 @@ void SerialTreeLearner::BeforeTrain() {
const
data_size_t
*
indices
=
data_partition_
->
indices
();
const
data_size_t
*
indices
=
data_partition_
->
indices
();
data_size_t
begin
=
data_partition_
->
leaf_begin
(
0
);
data_size_t
begin
=
data_partition_
->
leaf_begin
(
0
);
data_size_t
end
=
begin
+
data_partition_
->
leaf_count
(
0
);
data_size_t
end
=
begin
+
data_partition_
->
leaf_count
(
0
);
#pragma omp parallel for schedule(static, 512) if(end - begin >= 1024)
#pragma omp parallel for schedule(static, 512) if
(end - begin >= 1024)
for
(
data_size_t
i
=
begin
;
i
<
end
;
++
i
)
{
for
(
data_size_t
i
=
begin
;
i
<
end
;
++
i
)
{
is_data_in_leaf_
[
indices
[
i
]]
=
1
;
is_data_in_leaf_
[
indices
[
i
]]
=
1
;
}
}
...
@@ -335,7 +334,7 @@ void SerialTreeLearner::BeforeTrain() {
...
@@ -335,7 +334,7 @@ void SerialTreeLearner::BeforeTrain() {
OMP_LOOP_EX_END
();
OMP_LOOP_EX_END
();
}
}
OMP_THROW_EX
();
OMP_THROW_EX
();
#pragma omp parallel for schedule(static, 512) if(end - begin >= 1024)
#pragma omp parallel for schedule(static, 512) if
(end - begin >= 1024)
for
(
data_size_t
i
=
begin
;
i
<
end
;
++
i
)
{
for
(
data_size_t
i
=
begin
;
i
<
end
;
++
i
)
{
is_data_in_leaf_
[
indices
[
i
]]
=
0
;
is_data_in_leaf_
[
indices
[
i
]]
=
0
;
}
}
...
@@ -401,7 +400,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int
...
@@ -401,7 +400,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int
end
=
begin
+
right_cnt
;
end
=
begin
+
right_cnt
;
mark
=
0
;
mark
=
0
;
}
}
#pragma omp parallel for schedule(static, 512) if(end - begin >= 1024)
#pragma omp parallel for schedule(static, 512) if
(end - begin >= 1024)
for
(
data_size_t
i
=
begin
;
i
<
end
;
++
i
)
{
for
(
data_size_t
i
=
begin
;
i
<
end
;
++
i
)
{
is_data_in_leaf_
[
indices
[
i
]]
=
1
;
is_data_in_leaf_
[
indices
[
i
]]
=
1
;
}
}
...
@@ -414,7 +413,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int
...
@@ -414,7 +413,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int
OMP_LOOP_EX_END
();
OMP_LOOP_EX_END
();
}
}
OMP_THROW_EX
();
OMP_THROW_EX
();
#pragma omp parallel for schedule(static, 512) if(end - begin >= 1024)
#pragma omp parallel for schedule(static, 512) if
(end - begin >= 1024)
for
(
data_size_t
i
=
begin
;
i
<
end
;
++
i
)
{
for
(
data_size_t
i
=
begin
;
i
<
end
;
++
i
)
{
is_data_in_leaf_
[
indices
[
i
]]
=
0
;
is_data_in_leaf_
[
indices
[
i
]]
=
0
;
}
}
...
@@ -427,7 +426,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int
...
@@ -427,7 +426,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int
void
SerialTreeLearner
::
FindBestSplits
()
{
void
SerialTreeLearner
::
FindBestSplits
()
{
std
::
vector
<
int8_t
>
is_feature_used
(
num_features_
,
0
);
std
::
vector
<
int8_t
>
is_feature_used
(
num_features_
,
0
);
#pragma omp parallel for schedule(static,1024) if (num_features_ >= 2048)
#pragma omp parallel for schedule(static,
1024) if (num_features_ >= 2048)
for
(
int
feature_index
=
0
;
feature_index
<
num_features_
;
++
feature_index
)
{
for
(
int
feature_index
=
0
;
feature_index
<
num_features_
;
++
feature_index
)
{
if
(
!
is_feature_used_
[
feature_index
])
continue
;
if
(
!
is_feature_used_
[
feature_index
])
continue
;
if
(
parent_leaf_histogram_array_
!=
nullptr
if
(
parent_leaf_histogram_array_
!=
nullptr
...
@@ -542,7 +541,7 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
...
@@ -542,7 +541,7 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
}
}
int32_t
SerialTreeLearner
::
ForceSplits
(
Tree
*
tree
,
Json
&
forced_split_json
,
int
*
left_leaf
,
int32_t
SerialTreeLearner
::
ForceSplits
(
Tree
*
tree
,
Json
&
forced_split_json
,
int
*
left_leaf
,
int
*
right_leaf
,
int
*
cur_depth
,
int
*
right_leaf
,
int
*
cur_depth
,
bool
*
aborted_last_force_split
)
{
bool
*
aborted_last_force_split
)
{
int32_t
result_count
=
0
;
int32_t
result_count
=
0
;
// start at root leaf
// start at root leaf
...
@@ -553,8 +552,7 @@ int32_t SerialTreeLearner::ForceSplits(Tree* tree, Json& forced_split_json, int*
...
@@ -553,8 +552,7 @@ int32_t SerialTreeLearner::ForceSplits(Tree* tree, Json& forced_split_json, int*
bool
left_smaller
=
true
;
bool
left_smaller
=
true
;
std
::
unordered_map
<
int
,
SplitInfo
>
forceSplitMap
;
std
::
unordered_map
<
int
,
SplitInfo
>
forceSplitMap
;
q
.
push
(
std
::
make_pair
(
forced_split_json
,
*
left_leaf
));
q
.
push
(
std
::
make_pair
(
forced_split_json
,
*
left_leaf
));
while
(
!
q
.
empty
())
{
while
(
!
q
.
empty
())
{
// before processing next node from queue, store info for current left/right leaf
// before processing next node from queue, store info for current left/right leaf
// store "best split" for left and right, even if they might be overwritten by forced split
// store "best split" for left and right, even if they might be overwritten by forced split
if
(
BeforeFindBestSplit
(
tree
,
*
left_leaf
,
*
right_leaf
))
{
if
(
BeforeFindBestSplit
(
tree
,
*
left_leaf
,
*
right_leaf
))
{
...
@@ -815,7 +813,7 @@ void SerialTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj
...
@@ -815,7 +813,7 @@ void SerialTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj
for
(
int
i
=
0
;
i
<
tree
->
num_leaves
();
++
i
)
{
for
(
int
i
=
0
;
i
<
tree
->
num_leaves
();
++
i
)
{
tree
->
SetLeafOutput
(
i
,
outputs
[
i
]
/
n_nozeroworker_perleaf
[
i
]);
tree
->
SetLeafOutput
(
i
,
outputs
[
i
]
/
n_nozeroworker_perleaf
[
i
]);
}
}
}
}
}
}
}
}
...
...
src/treelearner/serial_tree_learner.h
View file @
90127b52
...
@@ -103,10 +103,9 @@ protected:
...
@@ -103,10 +103,9 @@ protected:
/* Force splits with forced_split_json dict and then return num splits forced.*/
/* Force splits with forced_split_json dict and then return num splits forced.*/
virtual
int32_t
ForceSplits
(
Tree
*
tree
,
Json
&
forced_split_json
,
int
*
left_leaf
,
virtual
int32_t
ForceSplits
(
Tree
*
tree
,
Json
&
forced_split_json
,
int
*
left_leaf
,
int
*
right_leaf
,
int
*
cur_depth
,
int
*
right_leaf
,
int
*
cur_depth
,
bool
*
aborted_last_force_split
);
bool
*
aborted_last_force_split
);
/*!
/*!
* \brief Get the number of data in a leaf
* \brief Get the number of data in a leaf
* \param leaf_idx The index of leaf
* \param leaf_idx The index of leaf
...
...
src/treelearner/split_info.hpp
View file @
90127b52
...
@@ -185,7 +185,6 @@ public:
...
@@ -185,7 +185,6 @@ public:
return
local_feature
==
other_feature
;
return
local_feature
==
other_feature
;
}
}
}
}
};
};
struct
LightSplitInfo
{
struct
LightSplitInfo
{
...
@@ -280,7 +279,6 @@ public:
...
@@ -280,7 +279,6 @@ public:
return
local_feature
==
other_feature
;
return
local_feature
==
other_feature
;
}
}
}
}
};
};
}
// namespace LightGBM
}
// namespace LightGBM
...
...
src/treelearner/voting_parallel_tree_learner.cpp
View file @
90127b52
...
@@ -370,7 +370,6 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits() {
...
@@ -370,7 +370,6 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits() {
template
<
typename
TREELEARNER_T
>
template
<
typename
TREELEARNER_T
>
void
VotingParallelTreeLearner
<
TREELEARNER_T
>::
FindBestSplitsFromHistograms
(
const
std
::
vector
<
int8_t
>&
,
bool
)
{
void
VotingParallelTreeLearner
<
TREELEARNER_T
>::
FindBestSplitsFromHistograms
(
const
std
::
vector
<
int8_t
>&
,
bool
)
{
std
::
vector
<
SplitInfo
>
smaller_bests_per_thread
(
this
->
num_threads_
);
std
::
vector
<
SplitInfo
>
smaller_bests_per_thread
(
this
->
num_threads_
);
std
::
vector
<
SplitInfo
>
larger_best_per_thread
(
this
->
num_threads_
);
std
::
vector
<
SplitInfo
>
larger_best_per_thread
(
this
->
num_threads_
);
// find best split from local aggregated histograms
// find best split from local aggregated histograms
...
@@ -506,4 +505,4 @@ void VotingParallelTreeLearner<TREELEARNER_T>::Split(Tree* tree, int best_Leaf,
...
@@ -506,4 +505,4 @@ void VotingParallelTreeLearner<TREELEARNER_T>::Split(Tree* tree, int best_Leaf,
// instantiate template classes, otherwise linker cannot find the code
// instantiate template classes, otherwise linker cannot find the code
template
class
VotingParallelTreeLearner
<
GPUTreeLearner
>;
template
class
VotingParallelTreeLearner
<
GPUTreeLearner
>;
template
class
VotingParallelTreeLearner
<
SerialTreeLearner
>;
template
class
VotingParallelTreeLearner
<
SerialTreeLearner
>;
}
// namespace
FTLBoost
}
// namespace
LightGBM
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment