Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
36531679
Unverified
Commit
36531679
authored
Jan 25, 2021
by
shiyu1994
Committed by
GitHub
Jan 25, 2021
Browse files
change Dataset::CopySubrow from group wise to column wise (#3720)
parent
66b96368
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
33 additions
and
4 deletions
+33
-4
include/LightGBM/feature_group.h
include/LightGBM/feature_group.h
+8
-0
src/io/dataset.cpp
src/io/dataset.cpp
+25
-4
No files found.
include/LightGBM/feature_group.h
View file @
36531679
...
@@ -232,6 +232,14 @@ class FeatureGroup {
...
@@ -232,6 +232,14 @@ class FeatureGroup {
}
}
}
}
inline
void
CopySubrowByCol
(
const
FeatureGroup
*
full_feature
,
const
data_size_t
*
used_indices
,
data_size_t
num_used_indices
,
int
fidx
)
{
if
(
!
is_multi_val_
)
{
bin_data_
->
CopySubrow
(
full_feature
->
bin_data_
.
get
(),
used_indices
,
num_used_indices
);
}
else
{
multi_bin_data_
[
fidx
]
->
CopySubrow
(
full_feature
->
multi_bin_data_
[
fidx
].
get
(),
used_indices
,
num_used_indices
);
}
}
void
AddFeaturesFrom
(
const
FeatureGroup
*
other
,
int
group_id
)
{
void
AddFeaturesFrom
(
const
FeatureGroup
*
other
,
int
group_id
)
{
CHECK
(
is_multi_val_
);
CHECK
(
is_multi_val_
);
CHECK
(
other
->
is_multi_val_
);
CHECK
(
other
->
is_multi_val_
);
...
...
src/io/dataset.cpp
View file @
36531679
...
@@ -782,15 +782,36 @@ void Dataset::CopySubrow(const Dataset* fullset,
...
@@ -782,15 +782,36 @@ void Dataset::CopySubrow(const Dataset* fullset,
const
data_size_t
*
used_indices
,
const
data_size_t
*
used_indices
,
data_size_t
num_used_indices
,
bool
need_meta_data
)
{
data_size_t
num_used_indices
,
bool
need_meta_data
)
{
CHECK_EQ
(
num_used_indices
,
num_data_
);
CHECK_EQ
(
num_used_indices
,
num_data_
);
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
std
::
vector
<
int
>
group_ids
,
subfeature_ids
;
group_ids
.
reserve
(
num_features_
);
subfeature_ids
.
reserve
(
num_features_
);
for
(
int
group
=
0
;
group
<
num_groups_
;
++
group
)
{
for
(
int
group
=
0
;
group
<
num_groups_
;
++
group
)
{
if
(
fullset
->
IsMultiGroup
(
group
))
{
for
(
int
sub_feature
=
0
;
sub_feature
<
fullset
->
feature_groups_
[
group
]
->
num_feature_
;
++
sub_feature
)
{
group_ids
.
emplace_back
(
group
);
subfeature_ids
.
emplace_back
(
sub_feature
);
}
}
else
{
group_ids
.
emplace_back
(
group
);
subfeature_ids
.
emplace_back
(
-
1
);
}
}
int
num_copy_tasks
=
static_cast
<
int
>
(
group_ids
.
size
());
OMP_INIT_EX
();
#pragma omp parallel for schedule(dynamic)
for
(
int
task_id
=
0
;
task_id
<
num_copy_tasks
;
++
task_id
)
{
OMP_LOOP_EX_BEGIN
();
OMP_LOOP_EX_BEGIN
();
feature_groups_
[
group
]
->
CopySubrow
(
fullset
->
feature_groups_
[
group
].
get
(),
int
group
=
group_ids
[
task_id
];
used_indices
,
num_used_indices
);
int
subfeature
=
subfeature_ids
[
task_id
];
feature_groups_
[
group
]
->
CopySubrowByCol
(
fullset
->
feature_groups_
[
group
].
get
(),
used_indices
,
num_used_indices
,
subfeature
);
OMP_LOOP_EX_END
();
OMP_LOOP_EX_END
();
}
}
OMP_THROW_EX
();
OMP_THROW_EX
();
if
(
need_meta_data
)
{
if
(
need_meta_data
)
{
metadata_
.
Init
(
fullset
->
metadata_
,
used_indices
,
num_used_indices
);
metadata_
.
Init
(
fullset
->
metadata_
,
used_indices
,
num_used_indices
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment