Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
73dc1bbd
Unverified
Commit
73dc1bbd
authored
Feb 25, 2020
by
Guolin Ke
Committed by
GitHub
Feb 25, 2020
Browse files
support larger entry size for multi-val bin (#2817)
parent
577d0946
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
78 additions
and
46 deletions
+78
-46
src/io/bin.cpp
src/io/bin.cpp
+37
-10
src/io/multi_val_dense_bin.hpp
src/io/multi_val_dense_bin.hpp
+3
-3
src/io/multi_val_sparse_bin.hpp
src/io/multi_val_sparse_bin.hpp
+38
-33
No files found.
src/io/bin.cpp
View file @
73dc1bbd
...
@@ -689,15 +689,42 @@ namespace LightGBM {
...
@@ -689,15 +689,42 @@ namespace LightGBM {
MultiValBin
*
MultiValBin
::
CreateMultiValSparseBin
(
data_size_t
num_data
,
MultiValBin
*
MultiValBin
::
CreateMultiValSparseBin
(
data_size_t
num_data
,
int
num_bin
,
int
num_bin
,
double
estimate_element_per_row
)
{
double
estimate_element_per_row
)
{
size_t
estimate_total_entries
=
static_cast
<
size_t
>
(
estimate_element_per_row
*
1.1
)
*
static_cast
<
size_t
>
(
num_data
);
if
(
estimate_total_entries
<=
std
::
numeric_limits
<
uint16_t
>::
max
())
{
if
(
num_bin
<=
256
)
{
if
(
num_bin
<=
256
)
{
return
new
MultiValSparseBin
<
uint
8_t
>
(
num_data
,
num_bin
,
return
new
MultiValSparseBin
<
uint
16_t
,
uint8_t
>
(
estimate_element_per_row
);
num_data
,
num_bin
,
estimate_element_per_row
);
}
else
if
(
num_bin
<=
65536
)
{
}
else
if
(
num_bin
<=
65536
)
{
return
new
MultiValSparseBin
<
uint16_t
>
(
num_data
,
num_bin
,
return
new
MultiValSparseBin
<
uint16_t
,
uint16_t
>
(
estimate_element_per_row
);
num_data
,
num_bin
,
estimate_element_per_row
);
}
else
{
}
else
{
return
new
MultiValSparseBin
<
uint32_t
>
(
num_data
,
num_bin
,
return
new
MultiValSparseBin
<
uint16_t
,
uint32_t
>
(
estimate_element_per_row
);
num_data
,
num_bin
,
estimate_element_per_row
);
}
}
else
if
(
estimate_total_entries
<=
std
::
numeric_limits
<
uint32_t
>::
max
())
{
if
(
num_bin
<=
256
)
{
return
new
MultiValSparseBin
<
uint32_t
,
uint8_t
>
(
num_data
,
num_bin
,
estimate_element_per_row
);
}
else
if
(
num_bin
<=
65536
)
{
return
new
MultiValSparseBin
<
uint32_t
,
uint16_t
>
(
num_data
,
num_bin
,
estimate_element_per_row
);
}
else
{
return
new
MultiValSparseBin
<
uint32_t
,
uint32_t
>
(
num_data
,
num_bin
,
estimate_element_per_row
);
}
}
else
{
if
(
num_bin
<=
256
)
{
return
new
MultiValSparseBin
<
size_t
,
uint8_t
>
(
num_data
,
num_bin
,
estimate_element_per_row
);
}
else
if
(
num_bin
<=
65536
)
{
return
new
MultiValSparseBin
<
size_t
,
uint16_t
>
(
num_data
,
num_bin
,
estimate_element_per_row
);
}
else
{
return
new
MultiValSparseBin
<
size_t
,
uint32_t
>
(
num_data
,
num_bin
,
estimate_element_per_row
);
}
}
}
}
}
...
...
src/io/multi_val_dense_bin.hpp
View file @
73dc1bbd
...
@@ -131,7 +131,7 @@ class MultiValDenseBin : public MultiValBin {
...
@@ -131,7 +131,7 @@ class MultiValDenseBin : public MultiValBin {
for
(
data_size_t
i
=
0
;
i
<
num_used_indices
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_used_indices
;
++
i
)
{
auto
j_start
=
RowPtr
(
i
);
auto
j_start
=
RowPtr
(
i
);
auto
other_j_start
=
other_bin
->
RowPtr
(
used_indices
[
i
]);
auto
other_j_start
=
other_bin
->
RowPtr
(
used_indices
[
i
]);
for
(
int64_t
j
=
other_j_start
;
for
(
auto
j
=
other_j_start
;
j
<
other_bin
->
RowPtr
(
used_indices
[
i
]
+
1
);
++
j
)
{
j
<
other_bin
->
RowPtr
(
used_indices
[
i
]
+
1
);
++
j
)
{
data_
[
j
-
other_j_start
+
j_start
]
=
other_bin
->
data_
[
j
];
data_
[
j
-
other_j_start
+
j_start
]
=
other_bin
->
data_
[
j
];
}
}
...
@@ -180,8 +180,8 @@ class MultiValDenseBin : public MultiValBin {
...
@@ -180,8 +180,8 @@ class MultiValDenseBin : public MultiValBin {
}
}
}
}
inline
int64
_t
RowPtr
(
data_size_t
idx
)
const
{
inline
size
_t
RowPtr
(
data_size_t
idx
)
const
{
return
static_cast
<
int64
_t
>
(
idx
)
*
num_feature_
;
return
static_cast
<
size
_t
>
(
idx
)
*
num_feature_
;
}
}
MultiValDenseBin
<
VAL_T
>*
Clone
()
override
;
MultiValDenseBin
<
VAL_T
>*
Clone
()
override
;
...
...
src/io/multi_val_sparse_bin.hpp
View file @
73dc1bbd
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
namespace
LightGBM
{
namespace
LightGBM
{
template
<
typename
VAL_T
>
template
<
typename
INDEX_T
,
typename
VAL_T
>
class
MultiValSparseBin
:
public
MultiValBin
{
class
MultiValSparseBin
:
public
MultiValBin
{
public:
public:
explicit
MultiValSparseBin
(
data_size_t
num_data
,
int
num_bin
,
explicit
MultiValSparseBin
(
data_size_t
num_data
,
int
num_bin
,
...
@@ -24,8 +24,9 @@ class MultiValSparseBin : public MultiValBin {
...
@@ -24,8 +24,9 @@ class MultiValSparseBin : public MultiValBin {
num_bin_
(
num_bin
),
num_bin_
(
num_bin
),
estimate_element_per_row_
(
estimate_element_per_row
)
{
estimate_element_per_row_
(
estimate_element_per_row
)
{
row_ptr_
.
resize
(
num_data_
+
1
,
0
);
row_ptr_
.
resize
(
num_data_
+
1
,
0
);
data_size_t
estimate_num_data
=
INDEX_T
estimate_num_data
=
static_cast
<
data_size_t
>
(
num_data_
*
estimate_element_per_row_
*
1.1
);
static_cast
<
INDEX_T
>
(
estimate_element_per_row_
*
1.1
)
*
static_cast
<
INDEX_T
>
(
num_data_
);
int
num_threads
=
1
;
int
num_threads
=
1
;
#pragma omp parallel
#pragma omp parallel
#pragma omp master
#pragma omp master
...
@@ -49,16 +50,18 @@ class MultiValSparseBin : public MultiValBin {
...
@@ -49,16 +50,18 @@ class MultiValSparseBin : public MultiValBin {
void
PushOneRow
(
int
tid
,
data_size_t
idx
,
void
PushOneRow
(
int
tid
,
data_size_t
idx
,
const
std
::
vector
<
uint32_t
>&
values
)
override
{
const
std
::
vector
<
uint32_t
>&
values
)
override
{
const
int
pre_alloc_size
=
50
;
const
int
pre_alloc_size
=
50
;
row_ptr_
[
idx
+
1
]
=
static_cast
<
data_size_t
>
(
values
.
size
());
row_ptr_
[
idx
+
1
]
=
static_cast
<
INDEX_T
>
(
values
.
size
());
if
(
tid
==
0
)
{
if
(
tid
==
0
)
{
if
(
t_size_
[
tid
]
+
row_ptr_
[
idx
+
1
]
>
static_cast
<
data_size_t
>
(
data_
.
size
()))
{
if
(
t_size_
[
tid
]
+
row_ptr_
[
idx
+
1
]
>
static_cast
<
INDEX_T
>
(
data_
.
size
()))
{
data_
.
resize
(
t_size_
[
tid
]
+
row_ptr_
[
idx
+
1
]
*
pre_alloc_size
);
data_
.
resize
(
t_size_
[
tid
]
+
row_ptr_
[
idx
+
1
]
*
pre_alloc_size
);
}
}
for
(
auto
val
:
values
)
{
for
(
auto
val
:
values
)
{
data_
[
t_size_
[
tid
]
++
]
=
static_cast
<
VAL_T
>
(
val
);
data_
[
t_size_
[
tid
]
++
]
=
static_cast
<
VAL_T
>
(
val
);
}
}
}
else
{
}
else
{
if
(
t_size_
[
tid
]
+
row_ptr_
[
idx
+
1
]
>
static_cast
<
data_size_t
>
(
t_data_
[
tid
-
1
].
size
()))
{
if
(
t_size_
[
tid
]
+
row_ptr_
[
idx
+
1
]
>
static_cast
<
INDEX_T
>
(
t_data_
[
tid
-
1
].
size
()))
{
t_data_
[
tid
-
1
].
resize
(
t_size_
[
tid
]
+
t_data_
[
tid
-
1
].
resize
(
t_size_
[
tid
]
+
row_ptr_
[
idx
+
1
]
*
pre_alloc_size
);
row_ptr_
[
idx
+
1
]
*
pre_alloc_size
);
}
}
...
@@ -68,13 +71,13 @@ class MultiValSparseBin : public MultiValBin {
...
@@ -68,13 +71,13 @@ class MultiValSparseBin : public MultiValBin {
}
}
}
}
void
MergeData
(
const
data_size_t
*
sizes
)
{
void
MergeData
(
const
INDEX_T
*
sizes
)
{
Common
::
FunctionTimer
fun_time
(
"MultiValSparseBin::MergeData"
,
global_timer
);
Common
::
FunctionTimer
fun_time
(
"MultiValSparseBin::MergeData"
,
global_timer
);
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
INDEX_T
i
=
0
;
i
<
static_cast
<
INDEX_T
>
(
num_data_
)
;
++
i
)
{
row_ptr_
[
i
+
1
]
+=
row_ptr_
[
i
];
row_ptr_
[
i
+
1
]
+=
row_ptr_
[
i
];
}
}
if
(
t_data_
.
size
()
>
0
)
{
if
(
t_data_
.
size
()
>
0
)
{
std
::
vector
<
data_size_t
>
offsets
(
1
+
t_data_
.
size
());
std
::
vector
<
INDEX_T
>
offsets
(
1
+
t_data_
.
size
());
offsets
[
0
]
=
sizes
[
0
];
offsets
[
0
]
=
sizes
[
0
];
for
(
size_t
tid
=
0
;
tid
<
t_data_
.
size
()
-
1
;
++
tid
)
{
for
(
size_t
tid
=
0
;
tid
<
t_data_
.
size
()
-
1
;
++
tid
)
{
offsets
[
tid
+
1
]
=
offsets
[
tid
]
+
sizes
[
tid
+
1
];
offsets
[
tid
+
1
]
=
offsets
[
tid
]
+
sizes
[
tid
+
1
];
...
@@ -193,14 +196,15 @@ class MultiValSparseBin : public MultiValBin {
...
@@ -193,14 +196,15 @@ class MultiValSparseBin : public MultiValBin {
void
CopySubset
(
const
Bin
*
full_bin
,
const
data_size_t
*
used_indices
,
void
CopySubset
(
const
Bin
*
full_bin
,
const
data_size_t
*
used_indices
,
data_size_t
num_used_indices
)
override
{
data_size_t
num_used_indices
)
override
{
auto
other_bin
=
dynamic_cast
<
const
MultiValSparseBin
<
VAL_T
>*>
(
full_bin
);
auto
other_bin
=
dynamic_cast
<
const
MultiValSparseBin
<
INDEX_T
,
VAL_T
>*>
(
full_bin
);
row_ptr_
.
resize
(
num_data_
+
1
,
0
);
row_ptr_
.
resize
(
num_data_
+
1
,
0
);
data_size_t
estimate_num_data
=
INDEX_T
estimate_num_data
=
static_cast
<
data_size_t
>
(
num_data_
*
estimate_element_per_row_
*
1.5
);
static_cast
<
INDEX_T
>
(
estimate_element_per_row_
*
1.1
)
*
static_cast
<
INDEX_T
>
(
num_data_
);
data_
.
clear
();
data_
.
clear
();
data_
.
reserve
(
estimate_num_data
);
data_
.
reserve
(
estimate_num_data
);
for
(
data_size_t
i
=
0
;
i
<
num_used_indices
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_used_indices
;
++
i
)
{
for
(
data_size_t
j
=
other_bin
->
row_ptr_
[
used_indices
[
i
]];
for
(
auto
j
=
other_bin
->
row_ptr_
[
used_indices
[
i
]];
j
<
other_bin
->
row_ptr_
[
used_indices
[
i
]
+
1
];
++
j
)
{
j
<
other_bin
->
row_ptr_
[
used_indices
[
i
]
+
1
];
++
j
)
{
data_
.
push_back
(
other_bin
->
data_
[
j
]);
data_
.
push_back
(
other_bin
->
data_
[
j
]);
}
}
...
@@ -211,7 +215,7 @@ class MultiValSparseBin : public MultiValBin {
...
@@ -211,7 +215,7 @@ class MultiValSparseBin : public MultiValBin {
MultiValBin
*
CreateLike
(
int
num_bin
,
int
,
MultiValBin
*
CreateLike
(
int
num_bin
,
int
,
double
estimate_element_per_row
)
const
override
{
double
estimate_element_per_row
)
const
override
{
return
new
MultiValSparseBin
<
VAL_T
>
(
num_data_
,
num_bin
,
return
new
MultiValSparseBin
<
INDEX_T
,
VAL_T
>
(
num_data_
,
num_bin
,
estimate_element_per_row
);
estimate_element_per_row
);
}
}
...
@@ -219,16 +223,16 @@ class MultiValSparseBin : public MultiValBin {
...
@@ -219,16 +223,16 @@ class MultiValSparseBin : public MultiValBin {
double
estimate_element_per_row
)
override
{
double
estimate_element_per_row
)
override
{
num_bin_
=
num_bin
;
num_bin_
=
num_bin
;
estimate_element_per_row_
=
estimate_element_per_row
;
estimate_element_per_row_
=
estimate_element_per_row
;
data_size_t
estimate_num_data
=
INDEX_T
estimate_num_data
=
static_cast
<
data_size_t
>
(
num_data_
*
estimate_element_per_row_
*
1.1
);
static_cast
<
INDEX_T
>
(
estimate_element_per_row_
*
1.1
)
*
static_cast
<
INDEX_T
>
(
num_data_
);
size_t
npart
=
1
+
t_data_
.
size
();
size_t
npart
=
1
+
t_data_
.
size
();
data_size_t
avg_num_data
=
INDEX_T
avg_num_data
=
static_cast
<
INDEX_T
>
(
estimate_num_data
/
npart
);
static_cast
<
data_size_t
>
(
estimate_num_data
/
npart
);
if
(
static_cast
<
INDEX_T
>
(
data_
.
size
())
<
avg_num_data
)
{
if
(
static_cast
<
data_size_t
>
(
data_
.
size
())
<
avg_num_data
)
{
data_
.
resize
(
avg_num_data
,
0
);
data_
.
resize
(
avg_num_data
,
0
);
}
}
for
(
size_t
i
=
0
;
i
<
t_data_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
t_data_
.
size
();
++
i
)
{
if
(
static_cast
<
data_size_t
>
(
t_data_
[
i
].
size
())
<
avg_num_data
)
{
if
(
static_cast
<
INDEX_T
>
(
t_data_
[
i
].
size
())
<
avg_num_data
)
{
t_data_
[
i
].
resize
(
avg_num_data
,
0
);
t_data_
[
i
].
resize
(
avg_num_data
,
0
);
}
}
}
}
...
@@ -239,27 +243,27 @@ class MultiValSparseBin : public MultiValBin {
...
@@ -239,27 +243,27 @@ class MultiValSparseBin : public MultiValBin {
const
std
::
vector
<
uint32_t
>&
upper
,
const
std
::
vector
<
uint32_t
>&
upper
,
const
std
::
vector
<
uint32_t
>&
delta
)
override
{
const
std
::
vector
<
uint32_t
>&
delta
)
override
{
const
auto
other
=
const
auto
other
=
reinterpret_cast
<
const
MultiValSparseBin
<
VAL_T
>*>
(
full_bin
);
reinterpret_cast
<
const
MultiValSparseBin
<
INDEX_T
,
VAL_T
>*>
(
full_bin
);
int
n_block
=
1
;
int
n_block
=
1
;
data_size_t
block_size
=
num_data_
;
data_size_t
block_size
=
num_data_
;
Threading
::
BlockInfo
<
data_size_t
>
(
static_cast
<
int
>
(
t_data_
.
size
()
+
1
),
Threading
::
BlockInfo
<
data_size_t
>
(
static_cast
<
int
>
(
t_data_
.
size
()
+
1
),
num_data_
,
1024
,
&
n_block
,
&
block_size
);
num_data_
,
1024
,
&
n_block
,
&
block_size
);
std
::
vector
<
data_size_t
>
sizes
(
t_data_
.
size
()
+
1
,
0
);
std
::
vector
<
INDEX_T
>
sizes
(
t_data_
.
size
()
+
1
,
0
);
const
int
pre_alloc_size
=
50
;
const
int
pre_alloc_size
=
50
;
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for schedule(static, 1)
for
(
int
tid
=
0
;
tid
<
n_block
;
++
tid
)
{
for
(
int
tid
=
0
;
tid
<
n_block
;
++
tid
)
{
data_size_t
start
=
tid
*
block_size
;
data_size_t
start
=
tid
*
block_size
;
data_size_t
end
=
std
::
min
(
num_data_
,
start
+
block_size
);
data_size_t
end
=
std
::
min
(
num_data_
,
start
+
block_size
);
auto
&
buf
=
(
tid
==
0
)
?
data_
:
t_data_
[
tid
-
1
];
auto
&
buf
=
(
tid
==
0
)
?
data_
:
t_data_
[
tid
-
1
];
data_size_t
size
=
0
;
INDEX_T
size
=
0
;
for
(
data_size_t
i
=
start
;
i
<
end
;
++
i
)
{
for
(
data_size_t
i
=
start
;
i
<
end
;
++
i
)
{
const
auto
j_start
=
other
->
RowPtr
(
i
);
const
auto
j_start
=
other
->
RowPtr
(
i
);
const
auto
j_end
=
other
->
RowPtr
(
i
+
1
);
const
auto
j_end
=
other
->
RowPtr
(
i
+
1
);
if
(
size
+
(
j_end
-
j_start
)
>
static_cast
<
data_size_t
>
(
buf
.
size
()))
{
if
(
size
+
(
j_end
-
j_start
)
>
static_cast
<
INDEX_T
>
(
buf
.
size
()))
{
buf
.
resize
(
size
+
(
j_end
-
j_start
)
*
pre_alloc_size
);
buf
.
resize
(
size
+
(
j_end
-
j_start
)
*
pre_alloc_size
);
}
}
int
k
=
0
;
int
k
=
0
;
const
data_size_t
pre_size
=
size
;
const
auto
pre_size
=
size
;
for
(
auto
j
=
j_start
;
j
<
j_end
;
++
j
)
{
for
(
auto
j
=
j_start
;
j
<
j_end
;
++
j
)
{
auto
val
=
other
->
data_
[
j
];
auto
val
=
other
->
data_
[
j
];
while
(
val
>=
upper
[
k
])
{
while
(
val
>=
upper
[
k
])
{
...
@@ -276,22 +280,23 @@ class MultiValSparseBin : public MultiValBin {
...
@@ -276,22 +280,23 @@ class MultiValSparseBin : public MultiValBin {
MergeData
(
sizes
.
data
());
MergeData
(
sizes
.
data
());
}
}
inline
data_size_t
RowPtr
(
data_size_t
idx
)
const
{
return
row_ptr_
[
idx
];
}
inline
INDEX_T
RowPtr
(
data_size_t
idx
)
const
{
return
row_ptr_
[
idx
];
}
MultiValSparseBin
<
VAL_T
>*
Clone
()
override
;
MultiValSparseBin
<
INDEX_T
,
VAL_T
>*
Clone
()
override
;
private:
private:
data_size_t
num_data_
;
data_size_t
num_data_
;
int
num_bin_
;
int
num_bin_
;
double
estimate_element_per_row_
;
double
estimate_element_per_row_
;
std
::
vector
<
VAL_T
,
Common
::
AlignmentAllocator
<
VAL_T
,
32
>>
data_
;
std
::
vector
<
VAL_T
,
Common
::
AlignmentAllocator
<
VAL_T
,
32
>>
data_
;
std
::
vector
<
data_size_t
,
Common
::
AlignmentAllocator
<
data_size_t
,
32
>>
std
::
vector
<
INDEX_T
,
Common
::
AlignmentAllocator
<
INDEX_T
,
32
>>
row_ptr_
;
row_ptr_
;
std
::
vector
<
std
::
vector
<
VAL_T
,
Common
::
AlignmentAllocator
<
VAL_T
,
32
>>>
std
::
vector
<
std
::
vector
<
VAL_T
,
Common
::
AlignmentAllocator
<
VAL_T
,
32
>>>
t_data_
;
t_data_
;
std
::
vector
<
data_size_t
>
t_size_
;
std
::
vector
<
INDEX_T
>
t_size_
;
MultiValSparseBin
<
VAL_T
>
(
const
MultiValSparseBin
<
VAL_T
>&
other
)
MultiValSparseBin
<
INDEX_T
,
VAL_T
>
(
const
MultiValSparseBin
<
INDEX_T
,
VAL_T
>&
other
)
:
num_data_
(
other
.
num_data_
),
:
num_data_
(
other
.
num_data_
),
num_bin_
(
other
.
num_bin_
),
num_bin_
(
other
.
num_bin_
),
estimate_element_per_row_
(
other
.
estimate_element_per_row_
),
estimate_element_per_row_
(
other
.
estimate_element_per_row_
),
...
@@ -299,9 +304,9 @@ class MultiValSparseBin : public MultiValBin {
...
@@ -299,9 +304,9 @@ class MultiValSparseBin : public MultiValBin {
row_ptr_
(
other
.
row_ptr_
)
{}
row_ptr_
(
other
.
row_ptr_
)
{}
};
};
template
<
typename
VAL_T
>
template
<
typename
INDEX_T
,
typename
VAL_T
>
MultiValSparseBin
<
VAL_T
>*
MultiValSparseBin
<
VAL_T
>::
Clone
()
{
MultiValSparseBin
<
INDEX_T
,
VAL_T
>*
MultiValSparseBin
<
INDEX_T
,
VAL_T
>::
Clone
()
{
return
new
MultiValSparseBin
<
VAL_T
>
(
*
this
);
return
new
MultiValSparseBin
<
INDEX_T
,
VAL_T
>
(
*
this
);
}
}
}
// namespace LightGBM
}
// namespace LightGBM
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment