Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
45adbf89
Commit
45adbf89
authored
Apr 17, 2018
by
Guolin Ke
Browse files
bug fixed for num_total_features.
parent
cf475dbb
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
8 deletions
+9
-8
src/io/dataset_loader.cpp
src/io/dataset_loader.cpp
+9
-8
No files found.
src/io/dataset_loader.cpp
View file @
45adbf89
...
@@ -788,7 +788,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
...
@@ -788,7 +788,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
}
}
}
}
dataset
->
set_feature_names
(
feature_names_
);
dataset
->
set_feature_names
(
feature_names_
);
std
::
vector
<
std
::
unique_ptr
<
BinMapper
>>
bin_mappers
(
sample_values
.
size
()
);
std
::
vector
<
std
::
unique_ptr
<
BinMapper
>>
bin_mappers
(
dataset
->
num_total_features_
);
const
data_size_t
filter_cnt
=
static_cast
<
data_size_t
>
(
const
data_size_t
filter_cnt
=
static_cast
<
data_size_t
>
(
static_cast
<
double
>
(
io_config_
.
min_data_in_leaf
*
sample_data
.
size
())
/
dataset
->
num_data_
);
static_cast
<
double
>
(
io_config_
.
min_data_in_leaf
*
sample_data
.
size
())
/
dataset
->
num_data_
);
...
@@ -817,21 +817,22 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
...
@@ -817,21 +817,22 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
// if have multi-machines, need to find bin distributed
// if have multi-machines, need to find bin distributed
// different machines will find bin for different features
// different machines will find bin for different features
int
total_num_feature
=
static_cast
<
int
>
(
sample_values
.
size
());
int
num_total_features
=
dataset
->
num_total_features_
;
total_num_feature
=
Network
::
GlobalSyncUpByMin
(
total_num_feature
);
num_total_features
=
Network
::
GlobalSyncUpByMin
(
num_total_features
);
dataset
->
num_total_features_
=
num_total_features
;
// start and len will store the process feature indices for different machines
// start and len will store the process feature indices for different machines
// machine i will find bins for features in [ start[i], start[i] + len[i] )
// machine i will find bins for features in [ start[i], start[i] + len[i] )
std
::
vector
<
int
>
start
(
num_machines
);
std
::
vector
<
int
>
start
(
num_machines
);
std
::
vector
<
int
>
len
(
num_machines
);
std
::
vector
<
int
>
len
(
num_machines
);
int
step
=
(
total_
num_
feature
+
num_machines
-
1
)
/
num_machines
;
int
step
=
(
num_
total_feature
s
+
num_machines
-
1
)
/
num_machines
;
if
(
step
<
1
)
{
step
=
1
;
}
if
(
step
<
1
)
{
step
=
1
;
}
start
[
0
]
=
0
;
start
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
num_machines
-
1
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_machines
-
1
;
++
i
)
{
len
[
i
]
=
std
::
min
(
step
,
total_
num_
feature
-
start
[
i
]);
len
[
i
]
=
std
::
min
(
step
,
num_
total_feature
s
-
start
[
i
]);
start
[
i
+
1
]
=
start
[
i
]
+
len
[
i
];
start
[
i
+
1
]
=
start
[
i
]
+
len
[
i
];
}
}
len
[
num_machines
-
1
]
=
total_
num_
feature
-
start
[
num_machines
-
1
];
len
[
num_machines
-
1
]
=
num_
total_feature
s
-
start
[
num_machines
-
1
];
OMP_INIT_EX
();
OMP_INIT_EX
();
#pragma omp parallel for schedule(guided)
#pragma omp parallel for schedule(guided)
for
(
int
i
=
0
;
i
<
len
[
rank
];
++
i
)
{
for
(
int
i
=
0
;
i
<
len
[
rank
];
++
i
)
{
...
@@ -859,7 +860,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
...
@@ -859,7 +860,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
// get size of bin mapper with max_bin size
// get size of bin mapper with max_bin size
int
type_size
=
BinMapper
::
SizeForSpecificBin
(
max_bin
);
int
type_size
=
BinMapper
::
SizeForSpecificBin
(
max_bin
);
// since sizes of different feature may not be same, we expand all bin mapper to type_size
// since sizes of different feature may not be same, we expand all bin mapper to type_size
comm_size_t
buffer_size
=
type_size
*
total_
num_
feature
;
comm_size_t
buffer_size
=
type_size
*
num_
total_feature
s
;
auto
input_buffer
=
std
::
vector
<
char
>
(
buffer_size
);
auto
input_buffer
=
std
::
vector
<
char
>
(
buffer_size
);
auto
output_buffer
=
std
::
vector
<
char
>
(
buffer_size
);
auto
output_buffer
=
std
::
vector
<
char
>
(
buffer_size
);
...
@@ -886,7 +887,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
...
@@ -886,7 +887,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
// gather global feature bin mappers
// gather global feature bin mappers
Network
::
Allgather
(
input_buffer
.
data
(),
size_start
.
data
(),
size_len
.
data
(),
output_buffer
.
data
(),
buffer_size
);
Network
::
Allgather
(
input_buffer
.
data
(),
size_start
.
data
(),
size_len
.
data
(),
output_buffer
.
data
(),
buffer_size
);
// restore features bins from buffer
// restore features bins from buffer
for
(
int
i
=
0
;
i
<
total_
num_
feature
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_
total_feature
s
;
++
i
)
{
if
(
ignore_features_
.
count
(
i
)
>
0
)
{
if
(
ignore_features_
.
count
(
i
)
>
0
)
{
bin_mappers
[
i
]
=
nullptr
;
bin_mappers
[
i
]
=
nullptr
;
continue
;
continue
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment