Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
3c999be3
Unverified
Commit
3c999be3
authored
Mar 26, 2019
by
Nikita Titov
Committed by
GitHub
Mar 26, 2019
Browse files
fixed cpplint error about spaces and newlines (#2068)
parent
823fc03c
Changes
20
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
98 additions
and
144 deletions
+98
-144
include/LightGBM/dataset_loader.h
include/LightGBM/dataset_loader.h
+1
-1
include/LightGBM/feature_group.h
include/LightGBM/feature_group.h
+2
-2
include/LightGBM/json11.hpp
include/LightGBM/json11.hpp
+1
-1
include/LightGBM/tree.h
include/LightGBM/tree.h
+1
-1
include/LightGBM/utils/common.h
include/LightGBM/utils/common.h
+1
-2
include/LightGBM/utils/text_reader.h
include/LightGBM/utils/text_reader.h
+7
-14
src/boosting/gbdt_model_text.cpp
src/boosting/gbdt_model_text.cpp
+4
-8
src/c_api.cpp
src/c_api.cpp
+2
-3
src/io/dataset.cpp
src/io/dataset.cpp
+30
-31
src/io/dataset_loader.cpp
src/io/dataset_loader.cpp
+2
-4
src/io/dense_bin.hpp
src/io/dense_bin.hpp
+1
-1
src/io/dense_nbits_bin.hpp
src/io/dense_nbits_bin.hpp
+2
-2
src/io/json11.cpp
src/io/json11.cpp
+8
-7
src/io/parser.cpp
src/io/parser.cpp
+2
-4
src/io/parser.hpp
src/io/parser.hpp
+1
-2
src/io/sparse_bin.hpp
src/io/sparse_bin.hpp
+2
-2
src/objective/regression_objective.hpp
src/objective/regression_objective.hpp
+1
-1
src/objective/xentropy_objective.hpp
src/objective/xentropy_objective.hpp
+1
-1
src/treelearner/gpu_tree_learner.cpp
src/treelearner/gpu_tree_learner.cpp
+28
-55
src/treelearner/tree_learner.cpp
src/treelearner/tree_learner.cpp
+1
-2
No files found.
include/LightGBM/dataset_loader.h
View file @
3c999be3
...
...
@@ -73,6 +73,6 @@ class DatasetLoader {
std
::
unordered_set
<
int
>
categorical_features_
;
};
}
}
// namespace LightGBM
#endif // LIGHTGBM_DATASET_LOADER_H_
include/LightGBM/feature_group.h
View file @
3c999be3
...
...
@@ -212,14 +212,14 @@ class FeatureGroup {
/*! \brief Disable copy */
FeatureGroup
&
operator
=
(
const
FeatureGroup
&
)
=
delete
;
/*! \brief Deep copy */
FeatureGroup
(
const
FeatureGroup
&
other
){
FeatureGroup
(
const
FeatureGroup
&
other
)
{
num_feature_
=
other
.
num_feature_
;
is_sparse_
=
other
.
is_sparse_
;
num_total_bin_
=
other
.
num_total_bin_
;
bin_offsets_
=
other
.
bin_offsets_
;
bin_mappers_
.
reserve
(
other
.
bin_mappers_
.
size
());
for
(
auto
&
bin_mapper
:
other
.
bin_mappers_
){
for
(
auto
&
bin_mapper
:
other
.
bin_mappers_
)
{
bin_mappers_
.
emplace_back
(
new
BinMapper
(
*
bin_mapper
));
}
...
...
include/LightGBM/json11.hpp
View file @
3c999be3
...
...
@@ -77,7 +77,7 @@ enum JsonParse {
class
JsonValue
;
class
Json
final
{
public:
public:
// Types
enum
Type
{
NUL
,
NUMBER
,
BOOL
,
STRING
,
ARRAY
,
OBJECT
...
...
include/LightGBM/tree.h
View file @
3c999be3
...
...
@@ -331,7 +331,7 @@ class Tree {
PathElement
(
int
i
,
double
z
,
double
o
,
double
w
)
:
feature_index
(
i
),
zero_fraction
(
z
),
one_fraction
(
o
),
pweight
(
w
)
{}
};
/*! \brief Polynomial time algorithm for SHAP values (
https://
ar
x
iv
.org/abs/
1706.06060)*/
/*! \brief Polynomial time algorithm for SHAP values (ar
X
iv
:
1706.06060)*/
void
TreeSHAP
(
const
double
*
feature_values
,
double
*
phi
,
int
node
,
int
unique_depth
,
PathElement
*
parent_unique_path
,
double
parent_zero_fraction
,
...
...
include/LightGBM/utils/common.h
View file @
3c999be3
...
...
@@ -341,8 +341,7 @@ inline static void Uint32ToStr(uint32_t value, char* buffer) {
if
(
value
<
10
)
{
*--
buffer
=
char
(
value
)
+
'0'
;
}
else
{
}
else
{
const
unsigned
i
=
value
<<
1
;
*--
buffer
=
kDigitsLut
[
i
+
1
];
*--
buffer
=
kDigitsLut
[
i
];
...
...
include/LightGBM/utils/text_reader.h
View file @
3c999be3
...
...
@@ -100,8 +100,7 @@ class TextReader {
last_line_
.
append
(
buffer_process
+
last_i
,
i
-
last_i
);
process_fun
(
total_cnt
,
last_line_
.
c_str
(),
last_line_
.
size
());
last_line_
=
""
;
}
else
{
}
else
{
process_fun
(
total_cnt
,
buffer_process
+
last_i
,
i
-
last_i
);
}
++
cnt
;
...
...
@@ -110,8 +109,7 @@ class TextReader {
// skip end of line
while
((
buffer_process
[
i
]
==
'\n'
||
buffer_process
[
i
]
==
'\r'
)
&&
i
<
read_cnt
)
{
++
i
;
}
last_i
=
i
;
}
else
{
}
else
{
++
i
;
}
}
...
...
@@ -167,8 +165,7 @@ class TextReader {
if
(
cur_sample_cnt
<
sample_cnt
)
{
out_sampled_data
->
emplace_back
(
buffer
,
size
);
++
cur_sample_cnt
;
}
else
{
}
else
{
const
size_t
idx
=
static_cast
<
size_t
>
(
random
.
NextInt
(
0
,
static_cast
<
int
>
(
line_idx
+
1
)));
if
(
idx
<
static_cast
<
size_t
>
(
sample_cnt
))
{
out_sampled_data
->
operator
[](
idx
)
=
std
::
string
(
buffer
,
size
);
...
...
@@ -207,8 +204,7 @@ class TextReader {
if
(
cur_sample_cnt
<
sample_cnt
)
{
out_sampled_data
->
emplace_back
(
buffer
,
size
);
++
cur_sample_cnt
;
}
else
{
}
else
{
const
size_t
idx
=
static_cast
<
size_t
>
(
random
.
NextInt
(
0
,
static_cast
<
int
>
(
out_used_data_indices
->
size
())));
if
(
idx
<
static_cast
<
size_t
>
(
sample_cnt
))
{
out_sampled_data
->
operator
[](
idx
)
=
std
::
string
(
buffer
,
size
);
...
...
@@ -250,8 +246,7 @@ class TextReader {
++
used_cnt
;
}
last_line_
=
""
;
}
else
{
}
else
{
if
(
filter_fun
(
used_cnt
,
total_cnt
))
{
lines_
.
emplace_back
(
buffer_process
+
last_i
,
i
-
last_i
);
++
used_cnt
;
...
...
@@ -263,8 +258,7 @@ class TextReader {
// skip end of line
while
((
buffer_process
[
i
]
==
'\n'
||
buffer_process
[
i
]
==
'\r'
)
&&
i
<
read_cnt
)
{
++
i
;
}
last_i
=
i
;
}
else
{
}
else
{
++
i
;
}
}
...
...
@@ -299,8 +293,7 @@ class TextReader {
[
&
used_data_indices
](
INDEX_T
used_cnt
,
INDEX_T
total_cnt
)
{
if
(
static_cast
<
size_t
>
(
used_cnt
)
<
used_data_indices
.
size
()
&&
total_cnt
==
used_data_indices
[
used_cnt
])
{
return
true
;
}
else
{
}
else
{
return
false
;
}
});
...
...
src/boosting/gbdt_model_text.cpp
View file @
3c999be3
...
...
@@ -355,11 +355,9 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) {
auto
strs
=
Common
::
Split
(
cur_line
.
c_str
(),
'='
);
if
(
strs
.
size
()
==
1
)
{
key_vals
[
strs
[
0
]]
=
""
;
}
else
if
(
strs
.
size
()
==
2
)
{
}
else
if
(
strs
.
size
()
==
2
)
{
key_vals
[
strs
[
0
]]
=
strs
[
1
];
}
else
if
(
strs
.
size
()
>
2
)
{
}
else
if
(
strs
.
size
()
>
2
)
{
if
(
strs
[
0
]
==
"feature_names"
)
{
key_vals
[
strs
[
0
]]
=
cur_line
.
substr
(
std
::
strlen
(
"feature_names="
));
}
else
{
...
...
@@ -367,8 +365,7 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) {
Log
::
Fatal
(
"Wrong line at model file: %s"
,
cur_line
.
substr
(
0
,
std
::
min
<
size_t
>
(
128
,
cur_line
.
size
())).
c_str
());
}
}
}
else
{
}
else
{
break
;
}
}
...
...
@@ -450,8 +447,7 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) {
size_t
used_len
=
0
;
models_
.
emplace_back
(
new
Tree
(
p
,
&
used_len
));
p
+=
used_len
;
}
else
{
}
else
{
break
;
}
}
...
...
src/c_api.cpp
View file @
3c999be3
...
...
@@ -713,7 +713,6 @@ int LGBM_DatasetCreateFromCSRFunc(void* get_row_funptr,
const
char
*
parameters
,
const
DatasetHandle
reference
,
DatasetHandle
*
out
)
{
API_BEGIN
();
auto
get_row_fun
=
*
static_cast
<
std
::
function
<
void
(
int
idx
,
std
::
vector
<
std
::
pair
<
int
,
double
>>&
)
>*>
(
get_row_funptr
);
...
...
@@ -758,7 +757,7 @@ int LGBM_DatasetCreateFromCSRFunc(void* get_row_funptr,
ret
->
CreateValid
(
reinterpret_cast
<
const
Dataset
*>
(
reference
));
}
OMP_INIT_EX
();
std
::
vector
<
std
::
pair
<
int
,
double
>>
threadBuffer
;
#pragma omp parallel for schedule(static) private(threadBuffer)
...
...
@@ -970,7 +969,7 @@ int LGBM_DatasetGetField(DatasetHandle handle,
}
else
if
(
dataset
->
GetDoubleField
(
field_name
,
out_len
,
reinterpret_cast
<
const
double
**>
(
out_ptr
)))
{
*
out_type
=
C_API_DTYPE_FLOAT64
;
is_success
=
true
;
}
else
if
(
dataset
->
GetInt8Field
(
field_name
,
out_len
,
reinterpret_cast
<
const
int8_t
**>
(
out_ptr
))){
}
else
if
(
dataset
->
GetInt8Field
(
field_name
,
out_len
,
reinterpret_cast
<
const
int8_t
**>
(
out_ptr
)))
{
*
out_type
=
C_API_DTYPE_INT8
;
is_success
=
true
;
}
...
...
src/io/dataset.cpp
View file @
3c999be3
...
...
@@ -578,11 +578,10 @@ bool Dataset::GetDoubleField(const char* field_name, data_size_t* out_len, const
if
(
name
==
std
::
string
(
"init_score"
))
{
*
out_ptr
=
metadata_
.
init_score
();
*
out_len
=
static_cast
<
data_size_t
>
(
metadata_
.
num_init_score
());
}
else
if
(
name
==
std
::
string
(
"feature_penalty"
)){
}
else
if
(
name
==
std
::
string
(
"feature_penalty"
))
{
*
out_ptr
=
feature_penalty_
.
data
();
*
out_len
=
static_cast
<
data_size_t
>
(
feature_penalty_
.
size
());
}
else
{
}
else
{
return
false
;
}
return
true
;
...
...
@@ -707,7 +706,7 @@ void Dataset::SaveBinaryFile(const char* bin_filename) {
}
}
void
Dataset
::
DumpTextFile
(
const
char
*
text_filename
){
void
Dataset
::
DumpTextFile
(
const
char
*
text_filename
)
{
FILE
*
file
=
NULL
;
#if _MSC_VER
fopen_s
(
&
file
,
text_filename
,
"wt"
);
...
...
@@ -719,36 +718,36 @@ void Dataset::DumpTextFile(const char* text_filename){
fprintf
(
file
,
"num_groups: %d
\n
"
,
num_groups_
);
fprintf
(
file
,
"num_data: %d
\n
"
,
num_data_
);
fprintf
(
file
,
"feature_names: "
);
for
(
auto
n
:
feature_names_
){
for
(
auto
n
:
feature_names_
)
{
fprintf
(
file
,
"%s, "
,
n
.
c_str
());
}
fprintf
(
file
,
"
\n
monotone_constraints: "
);
for
(
auto
i
:
monotone_types_
){
for
(
auto
i
:
monotone_types_
)
{
fprintf
(
file
,
"%d, "
,
i
);
}
fprintf
(
file
,
"
\n
feature_penalty: "
);
for
(
auto
i
:
feature_penalty_
){
for
(
auto
i
:
feature_penalty_
)
{
fprintf
(
file
,
"%lf, "
,
i
);
}
fprintf
(
file
,
"
\n
"
);
for
(
auto
n
:
feature_names_
){
for
(
auto
n
:
feature_names_
)
{
fprintf
(
file
,
"%s, "
,
n
.
c_str
());
}
std
::
vector
<
std
::
unique_ptr
<
BinIterator
>>
iterators
;
iterators
.
reserve
(
num_features_
);
for
(
int
j
=
0
;
j
<
num_features_
;
++
j
){
for
(
int
j
=
0
;
j
<
num_features_
;
++
j
)
{
auto
group_idx
=
feature2group_
[
j
];
auto
sub_idx
=
feature2subfeature_
[
j
];
iterators
.
emplace_back
(
feature_groups_
[
group_idx
]
->
SubFeatureIterator
(
sub_idx
));
}
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
){
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
fprintf
(
file
,
"
\n
"
);
for
(
int
j
=
0
;
j
<
num_total_features_
;
++
j
){
for
(
int
j
=
0
;
j
<
num_total_features_
;
++
j
)
{
auto
inner_feature_idx
=
used_feature_map_
[
j
];
if
(
inner_feature_idx
<
0
){
fprintf
(
file
,
"NA, "
);
if
(
inner_feature_idx
<
0
)
{
fprintf
(
file
,
"NA, "
);
}
else
{
fprintf
(
file
,
"%d, "
,
iterators
[
inner_feature_idx
]
->
RawGet
(
i
));
fprintf
(
file
,
"%d, "
,
iterators
[
inner_feature_idx
]
->
RawGet
(
i
));
}
}
}
...
...
@@ -947,50 +946,50 @@ void Dataset::FixHistogram(int feature_idx, double sum_gradient, double sum_hess
}
template
<
typename
T
>
void
PushVector
(
std
::
vector
<
T
>&
dest
,
const
std
::
vector
<
T
>&
src
){
void
PushVector
(
std
::
vector
<
T
>&
dest
,
const
std
::
vector
<
T
>&
src
)
{
dest
.
reserve
(
dest
.
size
()
+
src
.
size
());
for
(
auto
i
:
src
){
for
(
auto
i
:
src
)
{
dest
.
push_back
(
i
);
}
}
template
<
typename
T
>
void
PushOffset
(
std
::
vector
<
T
>&
dest
,
const
std
::
vector
<
T
>&
src
,
const
T
&
offset
){
void
PushOffset
(
std
::
vector
<
T
>&
dest
,
const
std
::
vector
<
T
>&
src
,
const
T
&
offset
)
{
dest
.
reserve
(
dest
.
size
()
+
src
.
size
());
for
(
auto
i
:
src
){
for
(
auto
i
:
src
)
{
dest
.
push_back
(
i
+
offset
);
}
}
template
<
typename
T
>
void
PushClearIfEmpty
(
std
::
vector
<
T
>&
dest
,
const
size_t
dest_len
,
const
std
::
vector
<
T
>&
src
,
const
size_t
src_len
,
const
T
&
deflt
){
if
(
!
dest
.
empty
()
&&
!
src
.
empty
()){
void
PushClearIfEmpty
(
std
::
vector
<
T
>&
dest
,
const
size_t
dest_len
,
const
std
::
vector
<
T
>&
src
,
const
size_t
src_len
,
const
T
&
deflt
)
{
if
(
!
dest
.
empty
()
&&
!
src
.
empty
())
{
PushVector
(
dest
,
src
);
}
else
if
(
!
dest
.
empty
()
&&
src
.
empty
()){
for
(
size_t
i
=
0
;
i
<
src_len
;
++
i
){
}
else
if
(
!
dest
.
empty
()
&&
src
.
empty
())
{
for
(
size_t
i
=
0
;
i
<
src_len
;
++
i
)
{
dest
.
push_back
(
deflt
);
}
}
else
if
(
dest
.
empty
()
&&
!
src
.
empty
()){
for
(
size_t
i
=
0
;
i
<
dest_len
;
++
i
){
}
else
if
(
dest
.
empty
()
&&
!
src
.
empty
())
{
for
(
size_t
i
=
0
;
i
<
dest_len
;
++
i
)
{
dest
.
push_back
(
deflt
);
}
PushVector
(
dest
,
src
);
}
}
void
Dataset
::
addFeaturesFrom
(
Dataset
*
other
){
if
(
other
->
num_data_
!=
num_data_
){
void
Dataset
::
addFeaturesFrom
(
Dataset
*
other
)
{
if
(
other
->
num_data_
!=
num_data_
)
{
throw
std
::
runtime_error
(
"Cannot add features from other Dataset with a different number of rows"
);
}
PushVector
(
feature_names_
,
other
->
feature_names_
);
PushVector
(
feature2subfeature_
,
other
->
feature2subfeature_
);
PushVector
(
group_feature_cnt_
,
other
->
group_feature_cnt_
);
feature_groups_
.
reserve
(
other
->
feature_groups_
.
size
());
for
(
auto
&
fg
:
other
->
feature_groups_
){
for
(
auto
&
fg
:
other
->
feature_groups_
)
{
feature_groups_
.
emplace_back
(
new
FeatureGroup
(
*
fg
));
}
for
(
auto
feature_idx
:
other
->
used_feature_map_
){
if
(
feature_idx
>=
0
){
for
(
auto
feature_idx
:
other
->
used_feature_map_
)
{
if
(
feature_idx
>=
0
)
{
used_feature_map_
.
push_back
(
feature_idx
+
num_features_
);
}
else
{
used_feature_map_
.
push_back
(
-
1
);
// Unused feature.
...
...
@@ -1000,14 +999,14 @@ void Dataset::addFeaturesFrom(Dataset* other){
PushOffset
(
feature2group_
,
other
->
feature2group_
,
num_groups_
);
auto
bin_offset
=
group_bin_boundaries_
.
back
();
// Skip the leading 0 when copying group_bin_boundaries.
for
(
auto
i
=
other
->
group_bin_boundaries_
.
begin
()
+
1
;
i
<
other
->
group_bin_boundaries_
.
end
();
++
i
){
for
(
auto
i
=
other
->
group_bin_boundaries_
.
begin
()
+
1
;
i
<
other
->
group_bin_boundaries_
.
end
();
++
i
)
{
group_bin_boundaries_
.
push_back
(
*
i
+
bin_offset
);
}
PushOffset
(
group_feature_start_
,
other
->
group_feature_start_
,
num_features_
);
PushClearIfEmpty
(
monotone_types_
,
num_total_features_
,
other
->
monotone_types_
,
other
->
num_total_features_
,
(
int8_t
)
0
);
PushClearIfEmpty
(
feature_penalty_
,
num_total_features_
,
other
->
feature_penalty_
,
other
->
num_total_features_
,
1.0
);
num_features_
+=
other
->
num_features_
;
num_total_features_
+=
other
->
num_total_features_
;
num_groups_
+=
other
->
num_groups_
;
...
...
src/io/dataset_loader.cpp
View file @
3c999be3
...
...
@@ -390,8 +390,7 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b
dataset
->
monotone_types_
[
inner_fidx
]
=
config_
.
monotone_constraints
[
i
];
}
}
}
else
{
}
else
{
const
int8_t
*
tmp_ptr_monotone_type
=
reinterpret_cast
<
const
int8_t
*>
(
mem_ptr
);
dataset
->
monotone_types_
.
clear
();
for
(
int
i
=
0
;
i
<
dataset
->
num_features_
;
++
i
)
{
...
...
@@ -413,8 +412,7 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b
dataset
->
feature_penalty_
[
inner_fidx
]
=
config_
.
feature_contri
[
i
];
}
}
}
else
{
}
else
{
const
double
*
tmp_ptr_feature_penalty
=
reinterpret_cast
<
const
double
*>
(
mem_ptr
);
dataset
->
feature_penalty_
.
clear
();
for
(
int
i
=
0
;
i
<
dataset
->
num_features_
;
++
i
)
{
...
...
src/io/dense_bin.hpp
View file @
3c999be3
...
...
@@ -322,7 +322,7 @@ class DenseBin: public Bin {
};
template
<
typename
VAL_T
>
DenseBin
<
VAL_T
>*
DenseBin
<
VAL_T
>::
Clone
(){
DenseBin
<
VAL_T
>*
DenseBin
<
VAL_T
>::
Clone
()
{
return
new
DenseBin
<
VAL_T
>
(
*
this
);
}
...
...
src/io/dense_nbits_bin.hpp
View file @
3c999be3
...
...
@@ -369,8 +369,8 @@ class Dense4bitsBin : public Bin {
protected:
Dense4bitsBin
(
const
Dense4bitsBin
&
other
)
:
num_data_
(
other
.
num_data_
),
data_
(
other
.
data_
),
buf_
(
other
.
buf_
){}
:
num_data_
(
other
.
num_data_
),
data_
(
other
.
data_
),
buf_
(
other
.
buf_
)
{}
data_size_t
num_data_
;
std
::
vector
<
uint8_t
>
data_
;
std
::
vector
<
uint8_t
>
buf_
;
...
...
src/io/json11.cpp
View file @
3c999be3
...
...
@@ -290,8 +290,10 @@ const Json & JsonObject::operator[] (const string &key) const {
return
(
iter
==
m_value
.
end
())
?
static_null
()
:
iter
->
second
;
}
const
Json
&
JsonArray
::
operator
[]
(
size_t
i
)
const
{
if
(
i
>=
m_value
.
size
())
return
static_null
();
else
return
m_value
[
i
];
if
(
i
>=
m_value
.
size
())
return
static_null
();
else
return
m_value
[
i
];
}
/* * * * * * * * * * * * * * * * * * * *
...
...
@@ -387,15 +389,14 @@ struct JsonParser final {
i
++
;
if
(
i
==
str
.
size
())
return
fail
(
"Unexpected end of input after start of comment"
,
false
);
if
(
str
[
i
]
==
'/'
)
{
// inline comment
if
(
str
[
i
]
==
'/'
)
{
// inline comment
i
++
;
// advance until next line, or end of input
while
(
i
<
str
.
size
()
&&
str
[
i
]
!=
'\n'
)
{
i
++
;
}
comment_found
=
true
;
}
else
if
(
str
[
i
]
==
'*'
)
{
// multiline comment
}
else
if
(
str
[
i
]
==
'*'
)
{
// multiline comment
i
++
;
if
(
i
>
str
.
size
()
-
2
)
return
fail
(
"Unexpected end of input inside multi-line comment"
,
false
);
...
...
@@ -407,9 +408,9 @@ struct JsonParser final {
}
i
+=
2
;
comment_found
=
true
;
}
else
}
else
{
return
fail
(
"Malformed comment"
,
false
);
}
}
return
comment_found
;
}
...
...
src/io/parser.cpp
View file @
3c999be3
...
...
@@ -150,12 +150,10 @@ Parser* Parser::CreateParser(const char* filename, bool header, int num_features
if
(
type
==
DataType
::
LIBSVM
)
{
label_idx
=
GetLabelIdxForLibsvm
(
line1
,
num_features
,
label_idx
);
ret
.
reset
(
new
LibSVMParser
(
label_idx
));
}
else
if
(
type
==
DataType
::
TSV
)
{
}
else
if
(
type
==
DataType
::
TSV
)
{
label_idx
=
GetLabelIdxForTSV
(
line1
,
num_features
,
label_idx
);
ret
.
reset
(
new
TSVParser
(
label_idx
,
tab_cnt
+
1
));
}
else
if
(
type
==
DataType
::
CSV
)
{
}
else
if
(
type
==
DataType
::
CSV
)
{
label_idx
=
GetLabelIdxForCSV
(
line1
,
num_features
,
label_idx
);
ret
.
reset
(
new
CSVParser
(
label_idx
,
comma_cnt
+
1
));
}
...
...
src/io/parser.hpp
View file @
3c999be3
...
...
@@ -28,8 +28,7 @@ class CSVParser: public Parser {
if
(
idx
==
label_idx_
)
{
*
out_label
=
val
;
bias
=
-
1
;
}
else
if
(
std
::
fabs
(
val
)
>
kZeroThreshold
||
std
::
isnan
(
val
))
{
}
else
if
(
std
::
fabs
(
val
)
>
kZeroThreshold
||
std
::
isnan
(
val
))
{
out_features
->
emplace_back
(
idx
+
bias
,
val
);
}
++
idx
;
...
...
src/io/sparse_bin.hpp
View file @
3c999be3
...
...
@@ -413,7 +413,7 @@ class SparseBin: public Bin {
SparseBin
<
VAL_T
>
(
const
SparseBin
<
VAL_T
>&
other
)
:
num_data_
(
other
.
num_data_
),
deltas_
(
other
.
deltas_
),
vals_
(
other
.
vals_
),
num_vals_
(
other
.
num_vals_
),
push_buffers_
(
other
.
push_buffers_
),
fast_index_
(
other
.
fast_index_
),
fast_index_shift_
(
other
.
fast_index_shift_
){}
fast_index_
(
other
.
fast_index_
),
fast_index_shift_
(
other
.
fast_index_shift_
)
{}
data_size_t
num_data_
;
std
::
vector
<
uint8_t
>
deltas_
;
...
...
@@ -425,7 +425,7 @@ class SparseBin: public Bin {
};
template
<
typename
VAL_T
>
SparseBin
<
VAL_T
>*
SparseBin
<
VAL_T
>::
Clone
(){
SparseBin
<
VAL_T
>*
SparseBin
<
VAL_T
>::
Clone
()
{
return
new
SparseBin
(
*
this
);
}
...
...
src/objective/regression_objective.hpp
View file @
3c999be3
...
...
@@ -58,7 +58,7 @@ namespace LightGBM {
CHECK(threshold < weighted_cdf[pos]);\
T v1 = data_reader(sorted_idx[pos - 1]);\
T v2 = data_reader(sorted_idx[pos]);\
if(weighted_cdf[pos + 1] - weighted_cdf[pos] > kEpsilon){\
if
(weighted_cdf[pos + 1] - weighted_cdf[pos] > kEpsilon)
{\
return static_cast<T>((threshold - weighted_cdf[pos]) / (weighted_cdf[pos + 1] - weighted_cdf[pos]) * (v2 - v1) + v1); \
} else {\
return static_cast<T>(v2);\
...
...
src/objective/xentropy_objective.hpp
View file @
3c999be3
...
...
@@ -251,7 +251,7 @@ class CrossEntropyLambda: public ObjectiveFunction {
return
initscore
;
}
private:
private:
/*! \brief Number of data points */
data_size_t
num_data_
;
/*! \brief Pointer for label */
...
...
src/treelearner/gpu_tree_learner.cpp
View file @
3c999be3
...
...
@@ -150,8 +150,7 @@ void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_featur
if
(
use_all_features
)
{
histogram_allfeats_kernels_
[
exp_workgroups_per_feature
].
set_arg
(
4
,
leaf_num_data
);
}
else
{
}
else
{
histogram_kernels_
[
exp_workgroups_per_feature
].
set_arg
(
4
,
leaf_num_data
);
}
// for the root node, indices are not copied
...
...
@@ -169,13 +168,11 @@ void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_featur
// the queue should be asynchrounous, and we will can WaitAndGetHistograms() before we start processing dense feature groups
if
(
leaf_num_data
==
num_data_
)
{
kernel_wait_obj_
=
boost
::
compute
::
wait_list
(
queue_
.
enqueue_1d_range_kernel
(
histogram_fulldata_kernels_
[
exp_workgroups_per_feature
],
0
,
num_workgroups
*
256
,
256
));
}
else
{
}
else
{
if
(
use_all_features
)
{
kernel_wait_obj_
=
boost
::
compute
::
wait_list
(
queue_
.
enqueue_1d_range_kernel
(
histogram_allfeats_kernels_
[
exp_workgroups_per_feature
],
0
,
num_workgroups
*
256
,
256
));
}
else
{
}
else
{
kernel_wait_obj_
=
boost
::
compute
::
wait_list
(
queue_
.
enqueue_1d_range_kernel
(
histogram_kernels_
[
exp_workgroups_per_feature
],
0
,
num_workgroups
*
256
,
256
));
}
...
...
@@ -208,8 +205,7 @@ void GPUTreeLearner::WaitAndGetHistograms(HistogramBinEntry* histograms) {
old_histogram_array
[
j
].
sum_hessians
=
hist_outputs
[
i
*
device_bin_size_
+
j
].
sum_hessians
;
old_histogram_array
[
j
].
cnt
=
(
data_size_t
)
hist_outputs
[
i
*
device_bin_size_
+
j
].
cnt
;
}
}
else
{
}
else
{
// values of this feature has been redistributed to multiple bins; need a reduction here
int
ind
=
0
;
for
(
int
j
=
0
;
j
<
bin_size
;
++
j
)
{
...
...
@@ -318,7 +314,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
num_dense_feature4_
,
ctx_
));
boost
::
compute
::
fill
(
sync_counters_
->
begin
(),
sync_counters_
->
end
(),
0
,
queue_
);
// The output buffer is allocated to host directly, to overlap compute and data transfer
device_histogram_outputs_
=
boost
::
compute
::
buffer
();
// deallocate
device_histogram_outputs_
=
boost
::
compute
::
buffer
();
// deallocate
device_histogram_outputs_
=
boost
::
compute
::
buffer
(
ctx_
,
num_dense_feature4_
*
dword_features_
*
device_bin_size_
*
hist_bin_entry_sz_
,
boost
::
compute
::
memory_object
::
write_only
|
boost
::
compute
::
memory_object
::
alloc_host_ptr
,
nullptr
);
// find the dense feature-groups and group then into Feature4 data structure (several feature-groups packed into 4 bytes)
...
...
@@ -337,8 +333,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
printf
(
"feature-group %d using multiplier %d
\n
"
,
i
,
device_bin_mults_
.
back
());
#endif
k
++
;
}
else
{
}
else
{
sparse_feature_group_map_
.
push_back
(
i
);
}
// found
...
...
@@ -410,8 +405,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
host4
[
j
].
s
[
3
]
=
(
uint8_t
)((
iters
[
6
].
RawGet
(
j
)
*
dev_bin_mult
[
6
]
+
((
j
+
6
)
&
(
dev_bin_mult
[
6
]
-
1
)))
|
((
iters
[
7
].
RawGet
(
j
)
*
dev_bin_mult
[
7
]
+
((
j
+
7
)
&
(
dev_bin_mult
[
7
]
-
1
)))
<<
4
));
}
}
else
if
(
dword_features_
==
4
)
{
}
else
if
(
dword_features_
==
4
)
{
// one feature datapoint is one byte
for
(
int
s_idx
=
0
;
s_idx
<
4
;
++
s_idx
)
{
BinIterator
*
bin_iter
=
train_data_
->
FeatureGroupIterator
(
dense_ind
[
s_idx
]);
...
...
@@ -422,20 +416,17 @@ void GPUTreeLearner::AllocateGPUMemory() {
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
s_idx
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
dev_bin_mult
[
s_idx
]
+
((
j
+
s_idx
)
&
(
dev_bin_mult
[
s_idx
]
-
1
)));
}
}
else
if
(
dynamic_cast
<
Dense4bitsBinIterator
*>
(
bin_iter
)
!=
0
)
{
}
else
if
(
dynamic_cast
<
Dense4bitsBinIterator
*>
(
bin_iter
)
!=
0
)
{
// Dense 4-bit bin
Dense4bitsBinIterator
iter
=
*
static_cast
<
Dense4bitsBinIterator
*>
(
bin_iter
);
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
s_idx
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
dev_bin_mult
[
s_idx
]
+
((
j
+
s_idx
)
&
(
dev_bin_mult
[
s_idx
]
-
1
)));
}
}
else
{
}
else
{
Log
::
Fatal
(
"Bug in GPU tree builder: only DenseBin and Dense4bitsBin are supported"
);
}
}
}
else
{
}
else
{
Log
::
Fatal
(
"Bug in GPU tree builder: dword_features_ can only be 4 or 8"
);
}
queue_
.
enqueue_write_buffer
(
device_features_
->
get_buffer
(),
...
...
@@ -469,12 +460,10 @@ void GPUTreeLearner::AllocateGPUMemory() {
+
((
j
+
i
)
&
(
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
-
1
)))
<<
((
i
&
1
)
<<
2
));
}
}
else
{
}
else
{
Log
::
Fatal
(
"GPU tree learner assumes that all bins are Dense4bitsBin when num_bin <= 16, but feature %d is not"
,
dense_dword_ind
[
i
]);
}
}
else
if
(
dword_features_
==
4
)
{
}
else
if
(
dword_features_
==
4
)
{
BinIterator
*
bin_iter
=
train_data_
->
FeatureGroupIterator
(
dense_dword_ind
[
i
]);
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
>*>
(
bin_iter
)
!=
0
)
{
DenseBinIterator
<
uint8_t
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
>*>
(
bin_iter
);
...
...
@@ -483,20 +472,17 @@ void GPUTreeLearner::AllocateGPUMemory() {
host4
[
j
].
s
[
i
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
+
((
j
+
i
)
&
(
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
-
1
)));
}
}
else
if
(
dynamic_cast
<
Dense4bitsBinIterator
*>
(
bin_iter
)
!=
0
)
{
}
else
if
(
dynamic_cast
<
Dense4bitsBinIterator
*>
(
bin_iter
)
!=
0
)
{
Dense4bitsBinIterator
iter
=
*
static_cast
<
Dense4bitsBinIterator
*>
(
bin_iter
);
#pragma omp parallel for schedule(static)
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
i
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
+
((
j
+
i
)
&
(
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
-
1
)));
}
}
else
{
}
else
{
Log
::
Fatal
(
"BUG in GPU tree builder: only DenseBin and Dense4bitsBin are supported"
);
}
}
else
{
}
else
{
Log
::
Fatal
(
"Bug in GPU tree builder: dword_features_ can only be 4 or 8"
);
}
}
...
...
@@ -509,8 +495,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
host4
[
j
].
s
[
i
>>
1
]
|=
(
uint8_t
)((
j
&
0xf
)
<<
((
i
&
1
)
<<
2
));
}
}
}
else
if
(
dword_features_
==
4
)
{
}
else
if
(
dword_features_
==
4
)
{
#pragma omp parallel for schedule(static)
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
for
(
int
i
=
k
;
i
<
dword_features_
;
++
i
)
{
...
...
@@ -671,8 +656,7 @@ void GPUTreeLearner::SetupKernelArguments() {
histogram_fulldata_kernels_
[
i
].
set_args
(
*
device_features_
,
device_feature_masks_
,
num_data_
,
*
device_data_indices_
,
num_data_
,
device_gradients_
,
0.0
f
,
*
device_subhistograms_
,
*
sync_counters_
,
device_histogram_outputs_
);
}
else
{
}
else
{
histogram_kernels_
[
i
].
set_args
(
*
device_features_
,
device_feature_masks_
,
num_data_
,
*
device_data_indices_
,
num_data_
,
device_gradients_
,
device_hessians_
,
*
device_subhistograms_
,
*
sync_counters_
,
device_histogram_outputs_
);
...
...
@@ -719,20 +703,17 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
kernel_name_
=
"histogram16"
;
device_bin_size_
=
16
;
dword_features_
=
8
;
}
else
if
(
max_num_bin_
<=
64
)
{
}
else
if
(
max_num_bin_
<=
64
)
{
kernel_source_
=
kernel64_src_
;
kernel_name_
=
"histogram64"
;
device_bin_size_
=
64
;
dword_features_
=
4
;
}
else
if
(
max_num_bin_
<=
256
)
{
}
else
if
(
max_num_bin_
<=
256
)
{
kernel_source_
=
kernel256_src_
;
kernel_name_
=
"histogram256"
;
device_bin_size_
=
256
;
dword_features_
=
4
;
}
else
{
}
else
{
Log
::
Fatal
(
"bin size %d cannot run on GPU"
,
max_num_bin_
);
}
if
(
max_num_bin_
==
65
)
{
...
...
@@ -781,8 +762,7 @@ void GPUTreeLearner::BeforeTrain() {
if
(
!
use_bagging_
&&
num_dense_feature_groups_
)
{
if
(
!
is_constant_hessian_
)
{
hessians_future_
=
queue_
.
enqueue_write_buffer_async
(
device_hessians_
,
0
,
num_data_
*
sizeof
(
score_t
),
hessians_
);
}
else
{
}
else
{
// setup hessian parameters only
score_t
const_hessian
=
hessians_
[
0
];
for
(
int
i
=
0
;
i
<=
kMaxLogWorkgroupsPerFeature
;
++
i
)
{
...
...
@@ -815,8 +795,7 @@ void GPUTreeLearner::BeforeTrain() {
}
// transfer hessian to GPU
hessians_future_
=
queue_
.
enqueue_write_buffer_async
(
device_hessians_
,
0
,
cnt
*
sizeof
(
score_t
),
ordered_hessians_
.
data
());
}
else
{
}
else
{
// setup hessian parameters only
score_t
const_hessian
=
hessians_
[
indices
[
0
]];
for
(
int
i
=
0
;
i
<=
kMaxLogWorkgroupsPerFeature
;
++
i
)
{
...
...
@@ -911,8 +890,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
ordered_gradients
[
i
]
=
gradients
[
data_indices
[
i
]];
}
gradients_future_
=
queue_
.
enqueue_write_buffer_async
(
device_gradients_
,
0
,
num_data
*
sizeof
(
score_t
),
ptr_pinned_gradients_
);
}
else
{
}
else
{
gradients_future_
=
queue_
.
enqueue_write_buffer_async
(
device_gradients_
,
0
,
num_data
*
sizeof
(
score_t
),
gradients
);
}
}
...
...
@@ -924,8 +902,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
ordered_hessians
[
i
]
=
hessians
[
data_indices
[
i
]];
}
hessians_future_
=
queue_
.
enqueue_write_buffer_async
(
device_hessians_
,
0
,
num_data
*
sizeof
(
score_t
),
ptr_pinned_hessians_
);
}
else
{
}
else
{
hessians_future_
=
queue_
.
enqueue_write_buffer_async
(
device_hessians_
,
0
,
num_data
*
sizeof
(
score_t
),
hessians
);
}
}
...
...
@@ -944,8 +921,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
if
(
is_feature_group_used
[
dense_feature_group_map_
[
i
]])
{
feature_masks_
[
i
]
=
1
;
++
used_dense_feature_groups
;
}
else
{
}
else
{
feature_masks_
[
i
]
=
0
;
}
}
...
...
@@ -981,8 +957,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u
if
(
!
is_feature_used
[
feature_index
])
continue
;
if
(
ordered_bins_
[
train_data_
->
Feature2Group
(
feature_index
)])
{
is_sparse_feature_used
[
feature_index
]
=
1
;
}
else
{
}
else
{
is_dense_feature_used
[
feature_index
]
=
1
;
}
}
...
...
@@ -1006,8 +981,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u
if
(
config_
->
gpu_use_dp
)
{
// use double precision
WaitAndGetHistograms
<
HistogramBinEntry
>
(
ptr_smaller_leaf_hist_data
);
}
else
{
}
else
{
// use single precision
WaitAndGetHistograms
<
GPUHistogramBinEntry
>
(
ptr_smaller_leaf_hist_data
);
}
...
...
@@ -1060,8 +1034,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u
if
(
config_
->
gpu_use_dp
)
{
// use double precision
WaitAndGetHistograms
<
HistogramBinEntry
>
(
ptr_larger_leaf_hist_data
);
}
else
{
}
else
{
// use single precision
WaitAndGetHistograms
<
GPUHistogramBinEntry
>
(
ptr_larger_leaf_hist_data
);
}
...
...
src/treelearner/tree_learner.cpp
View file @
3c999be3
...
...
@@ -17,8 +17,7 @@ TreeLearner* TreeLearner::CreateTreeLearner(const std::string& learner_type, con
}
else
if
(
learner_type
==
std
::
string
(
"voting"
))
{
return
new
VotingParallelTreeLearner
<
SerialTreeLearner
>
(
config
);
}
}
else
if
(
device_type
==
std
::
string
(
"gpu"
))
{
}
else
if
(
device_type
==
std
::
string
(
"gpu"
))
{
if
(
learner_type
==
std
::
string
(
"serial"
))
{
return
new
GPUTreeLearner
(
config
);
}
else
if
(
learner_type
==
std
::
string
(
"feature"
))
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment