Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
9b61166f
Unverified
Commit
9b61166f
authored
Oct 01, 2019
by
Nikita Titov
Committed by
GitHub
Oct 01, 2019
Browse files
fixed cpplint errors about spaces and newlines (#2481)
parent
6036e07d
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
33 additions
and
21 deletions
+33
-21
src/io/bin.cpp
src/io/bin.cpp
+13
-8
src/io/dataset.cpp
src/io/dataset.cpp
+1
-1
src/io/dataset_loader.cpp
src/io/dataset_loader.cpp
+17
-10
src/network/linkers_socket.cpp
src/network/linkers_socket.cpp
+1
-1
src/treelearner/cost_effective_gradient_boosting.hpp
src/treelearner/cost_effective_gradient_boosting.hpp
+1
-1
No files found.
src/io/bin.cpp
View file @
9b61166f
...
...
@@ -71,8 +71,9 @@ namespace LightGBM {
return
true
;
}
std
::
vector
<
double
>
GreedyFindBin
(
const
double
*
distinct_values
,
const
int
*
counts
,
int
num_distinct_values
,
int
max_bin
,
size_t
total_cnt
,
int
min_data_in_bin
)
{
std
::
vector
<
double
>
GreedyFindBin
(
const
double
*
distinct_values
,
const
int
*
counts
,
int
num_distinct_values
,
int
max_bin
,
size_t
total_cnt
,
int
min_data_in_bin
)
{
std
::
vector
<
double
>
bin_upper_bound
;
CHECK
(
max_bin
>
0
);
if
(
num_distinct_values
<=
max_bin
)
{
...
...
@@ -150,7 +151,9 @@ namespace LightGBM {
}
std
::
vector
<
double
>
FindBinWithPredefinedBin
(
const
double
*
distinct_values
,
const
int
*
counts
,
int
num_distinct_values
,
int
max_bin
,
size_t
total_sample_cnt
,
int
min_data_in_bin
,
const
std
::
vector
<
double
>&
forced_upper_bounds
)
{
int
num_distinct_values
,
int
max_bin
,
size_t
total_sample_cnt
,
int
min_data_in_bin
,
const
std
::
vector
<
double
>&
forced_upper_bounds
)
{
std
::
vector
<
double
>
bin_upper_bound
;
// get list of distinct values
...
...
@@ -246,8 +249,8 @@ namespace LightGBM {
return
bin_upper_bound
;
}
std
::
vector
<
double
>
FindBinWithZeroAsOneBin
(
const
double
*
distinct_values
,
const
int
*
counts
,
int
num_distinct_values
,
int
max_bin
,
size_t
total_sample_cnt
,
int
min_data_in_bin
)
{
std
::
vector
<
double
>
FindBinWithZeroAsOneBin
(
const
double
*
distinct_values
,
const
int
*
counts
,
int
num_distinct_values
,
int
max_bin
,
size_t
total_sample_cnt
,
int
min_data_in_bin
)
{
std
::
vector
<
double
>
bin_upper_bound
;
int
left_cnt_data
=
0
;
int
cnt_zero
=
0
;
...
...
@@ -305,7 +308,8 @@ namespace LightGBM {
}
std
::
vector
<
double
>
FindBinWithZeroAsOneBin
(
const
double
*
distinct_values
,
const
int
*
counts
,
int
num_distinct_values
,
int
max_bin
,
size_t
total_sample_cnt
,
int
min_data_in_bin
,
const
std
::
vector
<
double
>&
forced_upper_bounds
)
{
int
max_bin
,
size_t
total_sample_cnt
,
int
min_data_in_bin
,
const
std
::
vector
<
double
>&
forced_upper_bounds
)
{
if
(
forced_upper_bounds
.
empty
())
{
return
FindBinWithZeroAsOneBin
(
distinct_values
,
counts
,
num_distinct_values
,
max_bin
,
total_sample_cnt
,
min_data_in_bin
);
}
else
{
...
...
@@ -315,8 +319,9 @@ namespace LightGBM {
}
void
BinMapper
::
FindBin
(
double
*
values
,
int
num_sample_values
,
size_t
total_sample_cnt
,
int
max_bin
,
int
min_data_in_bin
,
int
min_split_data
,
BinType
bin_type
,
bool
use_missing
,
bool
zero_as_missing
,
const
std
::
vector
<
double
>&
forced_upper_bounds
)
{
int
max_bin
,
int
min_data_in_bin
,
int
min_split_data
,
BinType
bin_type
,
bool
use_missing
,
bool
zero_as_missing
,
const
std
::
vector
<
double
>&
forced_upper_bounds
)
{
int
na_cnt
=
0
;
int
tmp_num_sample_values
=
0
;
for
(
int
i
=
0
;
i
<
num_sample_values
;
++
i
)
{
...
...
src/io/dataset.cpp
View file @
9b61166f
...
...
@@ -721,7 +721,7 @@ void Dataset::SaveBinaryFile(const char* bin_filename) {
for
(
int
i
=
0
;
i
<
num_total_features_
;
++
i
)
{
int
num_bounds
=
static_cast
<
int
>
(
forced_bin_bounds_
[
i
].
size
());
writer
->
Write
(
&
num_bounds
,
sizeof
(
int
));
for
(
size_t
j
=
0
;
j
<
forced_bin_bounds_
[
i
].
size
();
++
j
)
{
writer
->
Write
(
&
forced_bin_bounds_
[
i
][
j
],
sizeof
(
double
));
}
...
...
src/io/dataset_loader.cpp
View file @
9b61166f
...
...
@@ -4,7 +4,6 @@
*/
#include <LightGBM/dataset_loader.h>
#include <LightGBM/json11.hpp>
#include <LightGBM/network.h>
#include <LightGBM/utils/array_args.h>
#include <LightGBM/utils/log.h>
...
...
@@ -12,6 +11,8 @@
#include <fstream>
#include <LightGBM/json11.hpp>
using
namespace
json11
;
namespace
LightGBM
{
...
...
@@ -270,7 +271,9 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename,
return
dataset
.
release
();
}
Dataset
*
DatasetLoader
::
LoadFromBinFile
(
const
char
*
data_filename
,
const
char
*
bin_filename
,
int
rank
,
int
num_machines
,
int
*
num_global_data
,
std
::
vector
<
data_size_t
>*
used_data_indices
)
{
Dataset
*
DatasetLoader
::
LoadFromBinFile
(
const
char
*
data_filename
,
const
char
*
bin_filename
,
int
rank
,
int
num_machines
,
int
*
num_global_data
,
std
::
vector
<
data_size_t
>*
used_data_indices
)
{
auto
dataset
=
std
::
unique_ptr
<
Dataset
>
(
new
Dataset
());
auto
reader
=
VirtualFileReader
::
Make
(
bin_filename
);
dataset
->
data_filename_
=
data_filename
;
...
...
@@ -470,13 +473,11 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b
mem_ptr
+=
sizeof
(
int
);
dataset
->
forced_bin_bounds_
[
i
]
=
std
::
vector
<
double
>
();
const
double
*
tmp_ptr_forced_bounds
=
reinterpret_cast
<
const
double
*>
(
mem_ptr
);
for
(
int
j
=
0
;
j
<
num_bounds
;
++
j
)
{
double
bound
=
tmp_ptr_forced_bounds
[
j
];
dataset
->
forced_bin_bounds_
[
i
].
push_back
(
bound
);
}
mem_ptr
+=
num_bounds
*
sizeof
(
double
);
}
// read size of meta data
...
...
@@ -661,7 +662,7 @@ Dataset* DatasetLoader::CostructFromSampleData(double** sample_values,
if
(
config_
.
max_bin_by_feature
.
empty
())
{
bin_mappers
[
i
]
->
FindBin
(
sample_values
[
start
[
rank
]
+
i
],
num_per_col
[
start
[
rank
]
+
i
],
total_sample_size
,
config_
.
max_bin
,
config_
.
min_data_in_bin
,
filter_cnt
,
bin_type
,
config_
.
use_missing
,
config_
.
zero_as_missing
,
filter_cnt
,
bin_type
,
config_
.
use_missing
,
config_
.
zero_as_missing
,
forced_bin_bounds
[
i
]);
}
else
{
bin_mappers
[
i
]
->
FindBin
(
sample_values
[
start
[
rank
]
+
i
],
num_per_col
[
start
[
rank
]
+
i
],
...
...
@@ -821,7 +822,9 @@ std::vector<std::string> DatasetLoader::SampleTextDataFromMemory(const std::vect
return
out
;
}
std
::
vector
<
std
::
string
>
DatasetLoader
::
SampleTextDataFromFile
(
const
char
*
filename
,
const
Metadata
&
metadata
,
int
rank
,
int
num_machines
,
int
*
num_global_data
,
std
::
vector
<
data_size_t
>*
used_data_indices
)
{
std
::
vector
<
std
::
string
>
DatasetLoader
::
SampleTextDataFromFile
(
const
char
*
filename
,
const
Metadata
&
metadata
,
int
rank
,
int
num_machines
,
int
*
num_global_data
,
std
::
vector
<
data_size_t
>*
used_data_indices
)
{
const
data_size_t
sample_cnt
=
static_cast
<
data_size_t
>
(
config_
.
bin_construct_sample_cnt
);
TextReader
<
data_size_t
>
text_reader
(
filename
,
config_
.
header
);
std
::
vector
<
std
::
string
>
out_data
;
...
...
@@ -867,7 +870,9 @@ std::vector<std::string> DatasetLoader::SampleTextDataFromFile(const char* filen
return
out_data
;
}
void
DatasetLoader
::
ConstructBinMappersFromTextData
(
int
rank
,
int
num_machines
,
const
std
::
vector
<
std
::
string
>&
sample_data
,
const
Parser
*
parser
,
Dataset
*
dataset
)
{
void
DatasetLoader
::
ConstructBinMappersFromTextData
(
int
rank
,
int
num_machines
,
const
std
::
vector
<
std
::
string
>&
sample_data
,
const
Parser
*
parser
,
Dataset
*
dataset
)
{
std
::
vector
<
std
::
vector
<
double
>>
sample_values
;
std
::
vector
<
std
::
vector
<
int
>>
sample_indices
;
std
::
vector
<
std
::
pair
<
int
,
double
>>
oneline_features
;
...
...
@@ -906,7 +911,8 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
// get forced split
std
::
string
forced_bins_path
=
config_
.
forcedbins_filename
;
std
::
vector
<
std
::
vector
<
double
>>
forced_bin_bounds
=
DatasetLoader
::
GetForcedBins
(
forced_bins_path
,
dataset
->
num_total_features_
,
std
::
vector
<
std
::
vector
<
double
>>
forced_bin_bounds
=
DatasetLoader
::
GetForcedBins
(
forced_bins_path
,
dataset
->
num_total_features_
,
categorical_features_
);
// check the range of label_idx, weight_idx and group_idx
...
...
@@ -993,7 +999,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
bin_mappers
[
i
]
->
FindBin
(
sample_values
[
start
[
rank
]
+
i
].
data
(),
static_cast
<
int
>
(
sample_values
[
start
[
rank
]
+
i
].
size
()),
sample_data
.
size
(),
config_
.
max_bin
,
config_
.
min_data_in_bin
,
filter_cnt
,
bin_type
,
config_
.
use_missing
,
config_
.
zero_as_missing
,
filter_cnt
,
bin_type
,
config_
.
use_missing
,
config_
.
zero_as_missing
,
forced_bin_bounds
[
i
]);
}
else
{
bin_mappers
[
i
]
->
FindBin
(
sample_values
[
start
[
rank
]
+
i
].
data
(),
...
...
@@ -1149,7 +1155,8 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat
}
/*! \brief Extract local features from file */
void
DatasetLoader
::
ExtractFeaturesFromFile
(
const
char
*
filename
,
const
Parser
*
parser
,
const
std
::
vector
<
data_size_t
>&
used_data_indices
,
Dataset
*
dataset
)
{
void
DatasetLoader
::
ExtractFeaturesFromFile
(
const
char
*
filename
,
const
Parser
*
parser
,
const
std
::
vector
<
data_size_t
>&
used_data_indices
,
Dataset
*
dataset
)
{
std
::
vector
<
double
>
init_score
;
if
(
predict_fun_
!=
nullptr
)
{
init_score
=
std
::
vector
<
double
>
(
dataset
->
num_data_
*
num_class_
);
...
...
src/network/linkers_socket.cpp
View file @
9b61166f
...
...
@@ -187,7 +187,7 @@ void Linkers::Construct() {
listener_
->
Listen
(
incoming_cnt
);
std
::
thread
listen_thread
(
&
Linkers
::
ListenThread
,
this
,
incoming_cnt
);
const
int
connect_fail_retry_cnt
=
20
;
const
int
connect_fail_retry_first_delay_interval
=
200
;
// 0.2 s
const
int
connect_fail_retry_first_delay_interval
=
200
;
// 0.2 s
const
float
connect_fail_retry_delay_factor
=
1.3
f
;
// start connect
for
(
auto
it
=
need_connect
.
begin
();
it
!=
need_connect
.
end
();
++
it
)
{
...
...
src/treelearner/cost_effective_gradient_boosting.hpp
View file @
9b61166f
...
...
@@ -36,7 +36,7 @@ class CostEfficientGradientBoosting {
is_feature_used_in_split_
.
clear
();
is_feature_used_in_split_
.
resize
(
train_data
->
num_features
());
if
(
!
tree_learner_
->
config_
->
cegb_penalty_feature_coupled
.
empty
()
if
(
!
tree_learner_
->
config_
->
cegb_penalty_feature_coupled
.
empty
()
&&
tree_learner_
->
config_
->
cegb_penalty_feature_coupled
.
size
()
!=
static_cast
<
size_t
>
(
train_data
->
num_total_features
()))
{
Log
::
Fatal
(
"cegb_penalty_feature_coupled should be the same size as feature number."
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment