Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
bcad692e
Unverified
Commit
bcad692e
authored
Mar 08, 2020
by
Guolin Ke
Committed by
GitHub
Mar 08, 2020
Browse files
Speed-up "Split" and some code refactorings (#2883)
* commit * fix msvc * fix format
parent
1a48fd26
Changes
13
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
703 additions
and
838 deletions
+703
-838
include/LightGBM/bin.h
include/LightGBM/bin.h
+25
-90
include/LightGBM/dataset.h
include/LightGBM/dataset.h
+8
-5
include/LightGBM/feature_group.h
include/LightGBM/feature_group.h
+29
-14
include/LightGBM/meta.h
include/LightGBM/meta.h
+3
-0
src/io/bin.cpp
src/io/bin.cpp
+8
-8
src/io/dense_bin.hpp
src/io/dense_bin.hpp
+311
-139
src/io/dense_nbits_bin.hpp
src/io/dense_nbits_bin.hpp
+0
-354
src/io/sparse_bin.hpp
src/io/sparse_bin.hpp
+237
-133
src/treelearner/data_partition.hpp
src/treelearner/data_partition.hpp
+1
-1
src/treelearner/feature_histogram.hpp
src/treelearner/feature_histogram.hpp
+61
-69
src/treelearner/gpu_tree_learner.cpp
src/treelearner/gpu_tree_learner.cpp
+20
-21
windows/LightGBM.vcxproj
windows/LightGBM.vcxproj
+0
-1
windows/LightGBM.vcxproj.filters
windows/LightGBM.vcxproj.filters
+0
-3
No files found.
include/LightGBM/bin.h
View file @
bcad692e
...
...
@@ -218,61 +218,6 @@ class BinMapper {
uint32_t
most_freq_bin_
;
};
/*!
* \brief Interface for ordered bin data. efficient for construct histogram, especially for sparse bin
* There are 2 advantages by using ordered bin.
* 1. group the data by leafs to improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram construction for sparse features.
* However it brings additional cost: it need re-order the bins after every split, which will cost much for dense feature.
* So we only using ordered bin for sparse situations.
*/
class
OrderedBin
{
public:
/*! \brief virtual destructor */
virtual
~
OrderedBin
()
{}
/*!
* \brief Initialization logic.
* \param used_indices If used_indices.size() == 0 means using all data, otherwise, used_indices[i] == true means i-th data is used
(this logic was build for bagging logic)
* \param num_leaves Number of leaves on this iteration
*/
virtual
void
Init
(
const
char
*
used_indices
,
data_size_t
num_leaves
)
=
0
;
/*!
* \brief Construct histogram by using this bin
* Note: Unlike Bin, OrderedBin doesn't use ordered gradients and ordered hessians.
* Because it is hard to know the relative index in one leaf for sparse bin, since we skipped zero bins.
* \param leaf Using which leaf's data to construct
* \param gradients Gradients, Note:non-ordered by leaf
* \param hessians Hessians, Note:non-ordered by leaf
* \param out Output Result
*/
virtual
void
ConstructHistogram
(
int
leaf
,
const
score_t
*
gradients
,
const
score_t
*
hessians
,
hist_t
*
out
)
const
=
0
;
/*!
* \brief Construct histogram by using this bin
* Note: Unlike Bin, OrderedBin doesn't use ordered gradients and ordered hessians.
* Because it is hard to know the relative index in one leaf for sparse bin, since we skipped zero bins.
* \param leaf Using which leaf's data to construct
* \param gradients Gradients, Note:non-ordered by leaf
* \param out Output Result
*/
virtual
void
ConstructHistogram
(
int
leaf
,
const
score_t
*
gradients
,
hist_t
*
out
)
const
=
0
;
/*!
* \brief Split current bin, and perform re-order by leaf
* \param leaf Using which leaf's to split
* \param right_leaf The new leaf index after perform this split
* \param is_in_leaf is_in_leaf[i] == mark means the i-th data will be on left leaf after split
* \param mark is_in_leaf[i] == mark means the i-th data will be on left leaf after split
*/
virtual
void
Split
(
int
leaf
,
int
right_leaf
,
const
char
*
is_in_leaf
,
char
mark
)
=
0
;
virtual
data_size_t
NonZeroCount
(
int
leaf
)
const
=
0
;
};
/*! \brief Iterator for one bin column */
class
BinIterator
{
public:
...
...
@@ -382,43 +327,33 @@ class Bin {
virtual
void
ConstructHistogram
(
data_size_t
start
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
hist_t
*
out
)
const
=
0
;
/*!
* \brief Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices)
* \param min_bin min_bin of current used feature
* \param max_bin max_bin of current used feature
* \param default_bin default bin for feature value 0
* \param most_freq_bin
* \param missing_type missing type
* \param default_left missing bin will go to left child
* \param threshold The split threshold.
* \param data_indices Used data indices. After called this function. The less than or equal data indices will store on this object.
* \param num_data Number of used data
* \param lte_indices After called this function. The less or equal data indices will store on this object.
* \param gt_indices After called this function. The greater data indices will store on this object.
* \return The number of less than or equal data.
*/
virtual
data_size_t
Split
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
default_bin
,
uint32_t
most_freq_bin
,
MissingType
missing_type
,
bool
default_left
,
uint32_t
threshold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
=
0
;
uint32_t
default_bin
,
uint32_t
most_freq_bin
,
MissingType
missing_type
,
bool
default_left
,
uint32_t
threshold
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
=
0
;
virtual
data_size_t
SplitCategorical
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
,
const
uint32_t
*
threshold
,
int
num_threshold
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
=
0
;
virtual
data_size_t
Split
(
uint32_t
max_bin
,
uint32_t
default_bin
,
uint32_t
most_freq_bin
,
MissingType
missing_type
,
bool
default_left
,
uint32_t
threshold
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
=
0
;
virtual
data_size_t
SplitCategorical
(
uint32_t
max_bin
,
uint32_t
most_freq_bin
,
const
uint32_t
*
threshold
,
int
num_threshold
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
=
0
;
/*!
* \brief Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices)
* \param min_bin min_bin of current used feature
* \param max_bin max_bin of current used feature
* \param most_freq_bin
* \param threshold The split threshold.
* \param num_threshold Number of threshold
* \param data_indices Used data indices. After called this function. The less than or equal data indices will store on this object.
* \param num_data Number of used data
* \param lte_indices After called this function. The less or equal data indices will store on this object.
* \param gt_indices After called this function. The greater data indices will store on this object.
* \return The number of less than or equal data.
*/
virtual
data_size_t
SplitCategorical
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
,
const
uint32_t
*
threshold
,
int
num_threshold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
=
0
;
/*!
* \brief After pushed all feature data, call this could have better refactor for bin data
...
...
include/LightGBM/dataset.h
View file @
bcad692e
...
...
@@ -535,13 +535,16 @@ class Dataset {
void
FixHistogram
(
int
feature_idx
,
double
sum_gradient
,
double
sum_hessian
,
hist_t
*
data
)
const
;
inline
data_size_t
Split
(
int
feature
,
const
uint32_t
*
threshold
,
int
num_threshold
,
bool
default_left
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
{
inline
data_size_t
Split
(
int
feature
,
const
uint32_t
*
threshold
,
int
num_threshold
,
bool
default_left
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
{
const
int
group
=
feature2group_
[
feature
];
const
int
sub_feature
=
feature2subfeature_
[
feature
];
return
feature_groups_
[
group
]
->
Split
(
sub_feature
,
threshold
,
num_threshold
,
default_left
,
data_indices
,
num_data
,
lte_indices
,
gt_indices
);
return
feature_groups_
[
group
]
->
Split
(
sub_feature
,
threshold
,
num_threshold
,
default_left
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
inline
int
SubFeatureBinOffset
(
int
i
)
const
{
...
...
include/LightGBM/feature_group.h
View file @
bcad692e
...
...
@@ -228,13 +228,11 @@ class FeatureGroup {
return
bin_data_
->
GetIterator
(
min_bin
,
max_bin
,
most_freq_bin
);
}
inline
data_size_t
Split
(
int
sub_feature
,
const
uint32_t
*
threshold
,
int
num_threshold
,
bool
default_left
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
{
inline
data_size_t
Split
(
int
sub_feature
,
const
uint32_t
*
threshold
,
int
num_threshold
,
bool
default_left
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
{
uint32_t
default_bin
=
bin_mappers_
[
sub_feature
]
->
GetDefaultBin
();
uint32_t
most_freq_bin
=
bin_mappers_
[
sub_feature
]
->
GetMostFreqBin
();
if
(
!
is_multi_val_
)
{
...
...
@@ -242,21 +240,38 @@ class FeatureGroup {
uint32_t
max_bin
=
bin_offsets_
[
sub_feature
+
1
]
-
1
;
if
(
bin_mappers_
[
sub_feature
]
->
bin_type
()
==
BinType
::
NumericalBin
)
{
auto
missing_type
=
bin_mappers_
[
sub_feature
]
->
missing_type
();
return
bin_data_
->
Split
(
min_bin
,
max_bin
,
default_bin
,
most_freq_bin
,
missing_type
,
default_left
,
*
threshold
,
data_indices
,
num_data
,
lte_indices
,
gt_indices
);
if
(
num_feature_
==
1
)
{
return
bin_data_
->
Split
(
max_bin
,
default_bin
,
most_freq_bin
,
missing_type
,
default_left
,
*
threshold
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
else
{
return
bin_data_
->
Split
(
min_bin
,
max_bin
,
default_bin
,
most_freq_bin
,
missing_type
,
default_left
,
*
threshold
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
}
else
{
return
bin_data_
->
SplitCategorical
(
min_bin
,
max_bin
,
most_freq_bin
,
threshold
,
num_threshold
,
data_indices
,
num_data
,
lte_indices
,
gt_indices
);
if
(
num_feature_
==
1
)
{
return
bin_data_
->
SplitCategorical
(
max_bin
,
most_freq_bin
,
threshold
,
num_threshold
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
else
{
return
bin_data_
->
SplitCategorical
(
min_bin
,
max_bin
,
most_freq_bin
,
threshold
,
num_threshold
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
}
}
else
{
int
addi
=
bin_mappers_
[
sub_feature
]
->
GetMostFreqBin
()
==
0
?
0
:
1
;
uint32_t
min_bin
=
1
;
uint32_t
max_bin
=
bin_mappers_
[
sub_feature
]
->
num_bin
()
-
1
+
addi
;
if
(
bin_mappers_
[
sub_feature
]
->
bin_type
()
==
BinType
::
NumericalBin
)
{
auto
missing_type
=
bin_mappers_
[
sub_feature
]
->
missing_type
();
return
multi_bin_data_
[
sub_feature
]
->
Split
(
min_bin
,
max_bin
,
default_bin
,
most_freq_bin
,
missing_type
,
default_left
,
*
threshold
,
data_indices
,
num_data
,
lte_indices
,
gt_indices
);
return
multi_bin_data_
[
sub_feature
]
->
Split
(
max_bin
,
default_bin
,
most_freq_bin
,
missing_type
,
default_left
,
*
threshold
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
else
{
return
multi_bin_data_
[
sub_feature
]
->
SplitCategorical
(
min_bin
,
max_bin
,
most_freq_bin
,
threshold
,
num_threshold
,
data_indices
,
num_data
,
lte_indices
,
gt_indices
);
return
multi_bin_data_
[
sub_feature
]
->
SplitCategorical
(
max_bin
,
most_freq_bin
,
threshold
,
num_threshold
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
}
}
...
...
include/LightGBM/meta.h
View file @
bcad692e
...
...
@@ -75,6 +75,9 @@ const int kAlignedSize = 32;
#define SIZE_ALIGNED(t) ((t) + kAlignedSize - 1) / kAlignedSize * kAlignedSize
// Refer to https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-4-c4127?view=vs-2019
#pragma warning(disable : 4127)
}
// namespace LightGBM
#endif // LightGBM_META_H_
src/io/bin.cpp
View file @
bcad692e
...
...
@@ -14,7 +14,6 @@
#include <cstring>
#include "dense_bin.hpp"
#include "dense_nbits_bin.hpp"
#include "multi_val_dense_bin.hpp"
#include "multi_val_sparse_bin.hpp"
#include "sparse_bin.hpp"
...
...
@@ -633,9 +632,10 @@ namespace LightGBM {
return
ret
;
}
template
class
DenseBin
<
uint8_t
>;
template
class
DenseBin
<
uint16_t
>;
template
class
DenseBin
<
uint32_t
>;
template
class
DenseBin
<
uint8_t
,
true
>;
template
class
DenseBin
<
uint8_t
,
false
>;
template
class
DenseBin
<
uint16_t
,
false
>;
template
class
DenseBin
<
uint32_t
,
false
>;
template
class
SparseBin
<
uint8_t
>;
template
class
SparseBin
<
uint16_t
>;
...
...
@@ -647,13 +647,13 @@ namespace LightGBM {
Bin
*
Bin
::
CreateDenseBin
(
data_size_t
num_data
,
int
num_bin
)
{
if
(
num_bin
<=
16
)
{
return
new
Dense
4bitsBin
(
num_data
);
return
new
Dense
Bin
<
uint8_t
,
true
>
(
num_data
);
}
else
if
(
num_bin
<=
256
)
{
return
new
DenseBin
<
uint8_t
>
(
num_data
);
return
new
DenseBin
<
uint8_t
,
false
>
(
num_data
);
}
else
if
(
num_bin
<=
65536
)
{
return
new
DenseBin
<
uint16_t
>
(
num_data
);
return
new
DenseBin
<
uint16_t
,
false
>
(
num_data
);
}
else
{
return
new
DenseBin
<
uint32_t
>
(
num_data
);
return
new
DenseBin
<
uint32_t
,
false
>
(
num_data
);
}
}
...
...
src/io/dense_bin.hpp
View file @
bcad692e
This diff is collapsed.
Click to expand it.
src/io/dense_nbits_bin.hpp
deleted
100644 → 0
View file @
1a48fd26
/*!
* Copyright (c) 2017 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifndef LIGHTGBM_IO_DENSE_NBITS_BIN_HPP_
#define LIGHTGBM_IO_DENSE_NBITS_BIN_HPP_
#include <LightGBM/bin.h>
#include <cstdint>
#include <cstring>
#include <vector>
namespace
LightGBM
{
class
Dense4bitsBin
;
class
Dense4bitsBinIterator
:
public
BinIterator
{
public:
explicit
Dense4bitsBinIterator
(
const
Dense4bitsBin
*
bin_data
,
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
)
:
bin_data_
(
bin_data
),
min_bin_
(
static_cast
<
uint8_t
>
(
min_bin
)),
max_bin_
(
static_cast
<
uint8_t
>
(
max_bin
)),
most_freq_bin_
(
static_cast
<
uint8_t
>
(
most_freq_bin
))
{
if
(
most_freq_bin_
==
0
)
{
offset_
=
1
;
}
else
{
offset_
=
0
;
}
}
inline
uint32_t
RawGet
(
data_size_t
idx
)
override
;
inline
uint32_t
Get
(
data_size_t
idx
)
override
;
inline
void
Reset
(
data_size_t
)
override
{}
private:
const
Dense4bitsBin
*
bin_data_
;
uint8_t
min_bin_
;
uint8_t
max_bin_
;
uint8_t
most_freq_bin_
;
uint8_t
offset_
;
};
class
Dense4bitsBin
:
public
Bin
{
public:
friend
Dense4bitsBinIterator
;
explicit
Dense4bitsBin
(
data_size_t
num_data
)
:
num_data_
(
num_data
)
{
int
len
=
(
num_data_
+
1
)
/
2
;
data_
.
resize
(
len
,
static_cast
<
uint8_t
>
(
0
));
buf_
=
std
::
vector
<
uint8_t
>
(
len
,
static_cast
<
uint8_t
>
(
0
));
}
~
Dense4bitsBin
()
{
}
void
Push
(
int
,
data_size_t
idx
,
uint32_t
value
)
override
{
const
int
i1
=
idx
>>
1
;
const
int
i2
=
(
idx
&
1
)
<<
2
;
const
uint8_t
val
=
static_cast
<
uint8_t
>
(
value
)
<<
i2
;
if
(
i2
==
0
)
{
data_
[
i1
]
=
val
;
}
else
{
buf_
[
i1
]
=
val
;
}
}
void
ReSize
(
data_size_t
num_data
)
override
{
if
(
num_data_
!=
num_data
)
{
num_data_
=
num_data
;
const
int
len
=
(
num_data_
+
1
)
/
2
;
data_
.
resize
(
len
);
}
}
inline
BinIterator
*
GetIterator
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
)
const
override
;
template
<
bool
USE_INDICES
,
bool
USE_PREFETCH
,
bool
USE_HESSIAN
>
void
ConstructHistogramInner
(
const
data_size_t
*
data_indices
,
data_size_t
start
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
hist_t
*
out
)
const
{
data_size_t
i
=
start
;
hist_t
*
grad
=
out
;
hist_t
*
hess
=
out
+
1
;
hist_cnt_t
*
cnt
=
reinterpret_cast
<
hist_cnt_t
*>
(
hess
);
if
(
USE_PREFETCH
)
{
const
data_size_t
pf_offset
=
64
;
const
data_size_t
pf_end
=
end
-
pf_offset
;
for
(;
i
<
pf_end
;
++
i
)
{
const
auto
idx
=
USE_INDICES
?
data_indices
[
i
]
:
i
;
const
auto
pf_idx
=
USE_INDICES
?
data_indices
[
i
+
pf_offset
]
:
i
+
pf_offset
;
PREFETCH_T0
(
data_
.
data
()
+
(
pf_idx
>>
1
));
const
uint8_t
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
const
uint8_t
ti
=
static_cast
<
uint8_t
>
(
bin
)
<<
1
;
if
(
USE_HESSIAN
)
{
grad
[
ti
]
+=
ordered_gradients
[
i
];
hess
[
ti
]
+=
ordered_hessians
[
i
];
}
else
{
grad
[
ti
]
+=
ordered_gradients
[
i
];
++
cnt
[
ti
];
}
}
}
for
(;
i
<
end
;
++
i
)
{
const
auto
idx
=
USE_INDICES
?
data_indices
[
i
]
:
i
;
const
uint8_t
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
const
uint8_t
ti
=
static_cast
<
uint8_t
>
(
bin
)
<<
1
;
if
(
USE_HESSIAN
)
{
grad
[
ti
]
+=
ordered_gradients
[
i
];
hess
[
ti
]
+=
ordered_hessians
[
i
];
}
else
{
grad
[
ti
]
+=
ordered_gradients
[
i
];
++
cnt
[
ti
];
}
}
}
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
start
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
hist_t
*
out
)
const
override
{
ConstructHistogramInner
<
true
,
true
,
true
>
(
data_indices
,
start
,
end
,
ordered_gradients
,
ordered_hessians
,
out
);
}
void
ConstructHistogram
(
data_size_t
start
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
hist_t
*
out
)
const
override
{
ConstructHistogramInner
<
false
,
false
,
true
>
(
nullptr
,
start
,
end
,
ordered_gradients
,
ordered_hessians
,
out
);
}
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
start
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
hist_t
*
out
)
const
override
{
ConstructHistogramInner
<
true
,
true
,
false
>
(
data_indices
,
start
,
end
,
ordered_gradients
,
nullptr
,
out
);
}
void
ConstructHistogram
(
data_size_t
start
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
hist_t
*
out
)
const
override
{
ConstructHistogramInner
<
false
,
false
,
false
>
(
nullptr
,
start
,
end
,
ordered_gradients
,
nullptr
,
out
);
}
data_size_t
Split
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
default_bin
,
uint32_t
most_freq_bin
,
MissingType
missing_type
,
bool
default_left
,
uint32_t
threshold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
if
(
num_data
<=
0
)
{
return
0
;
}
uint8_t
th
=
static_cast
<
uint8_t
>
(
threshold
+
min_bin
);
const
uint8_t
minb
=
static_cast
<
uint8_t
>
(
min_bin
);
const
uint8_t
maxb
=
static_cast
<
uint8_t
>
(
max_bin
);
uint8_t
t_zero_bin
=
static_cast
<
uint8_t
>
(
min_bin
+
default_bin
);
uint8_t
t_most_freq_bin
=
static_cast
<
uint8_t
>
(
min_bin
+
most_freq_bin
);
if
(
most_freq_bin
==
0
)
{
th
-=
1
;
t_zero_bin
-=
1
;
t_most_freq_bin
-=
1
;
}
data_size_t
lte_count
=
0
;
data_size_t
gt_count
=
0
;
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_count
=
&
gt_count
;
data_size_t
*
missing_default_indices
=
gt_indices
;
data_size_t
*
missing_default_count
=
&
gt_count
;
if
(
most_freq_bin
<=
threshold
)
{
default_indices
=
lte_indices
;
default_count
=
&
lte_count
;
}
if
(
missing_type
==
MissingType
::
NaN
)
{
if
(
default_left
)
{
missing_default_indices
=
lte_indices
;
missing_default_count
=
&
lte_count
;
}
if
(
t_most_freq_bin
==
maxb
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
uint8_t
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
if
(
t_most_freq_bin
==
bin
||
bin
<
minb
||
bin
>
maxb
)
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
else
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
uint8_t
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
if
(
bin
==
maxb
)
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
<
minb
||
bin
>
maxb
||
t_most_freq_bin
==
bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
}
else
{
if
((
default_left
&&
missing_type
==
MissingType
::
Zero
)
||
(
default_bin
<=
threshold
&&
missing_type
!=
MissingType
::
Zero
))
{
missing_default_indices
=
lte_indices
;
missing_default_count
=
&
lte_count
;
}
if
(
default_bin
==
most_freq_bin
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
uint8_t
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
if
(
bin
<
minb
||
bin
>
maxb
||
t_most_freq_bin
==
bin
)
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
else
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
uint8_t
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
if
(
bin
==
t_zero_bin
)
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
<
minb
||
bin
>
maxb
||
t_most_freq_bin
==
bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
}
return
lte_count
;
}
data_size_t
SplitCategorical
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
,
const
uint32_t
*
threshold
,
int
num_threahold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
if
(
num_data
<=
0
)
{
return
0
;
}
data_size_t
lte_count
=
0
;
data_size_t
gt_count
=
0
;
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_count
=
&
gt_count
;
if
(
Common
::
FindInBitset
(
threshold
,
num_threahold
,
most_freq_bin
))
{
default_indices
=
lte_indices
;
default_count
=
&
lte_count
;
}
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
uint32_t
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
if
(
bin
<
min_bin
||
bin
>
max_bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
Common
::
FindInBitset
(
threshold
,
num_threahold
,
bin
-
min_bin
))
{
lte_indices
[
lte_count
++
]
=
idx
;
}
else
{
gt_indices
[
gt_count
++
]
=
idx
;
}
}
return
lte_count
;
}
data_size_t
num_data
()
const
override
{
return
num_data_
;
}
void
FinishLoad
()
override
{
if
(
buf_
.
empty
())
{
return
;
}
int
len
=
(
num_data_
+
1
)
/
2
;
for
(
int
i
=
0
;
i
<
len
;
++
i
)
{
data_
[
i
]
|=
buf_
[
i
];
}
buf_
.
clear
();
}
void
LoadFromMemory
(
const
void
*
memory
,
const
std
::
vector
<
data_size_t
>&
local_used_indices
)
override
{
const
uint8_t
*
mem_data
=
reinterpret_cast
<
const
uint8_t
*>
(
memory
);
if
(
!
local_used_indices
.
empty
())
{
const
data_size_t
rest
=
num_data_
&
1
;
for
(
int
i
=
0
;
i
<
num_data_
-
rest
;
i
+=
2
)
{
// get old bins
data_size_t
idx
=
local_used_indices
[
i
];
const
auto
bin1
=
static_cast
<
uint8_t
>
((
mem_data
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
);
idx
=
local_used_indices
[
i
+
1
];
const
auto
bin2
=
static_cast
<
uint8_t
>
((
mem_data
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
);
// add
const
int
i1
=
i
>>
1
;
data_
[
i1
]
=
(
bin1
|
(
bin2
<<
4
));
}
if
(
rest
)
{
data_size_t
idx
=
local_used_indices
[
num_data_
-
1
];
data_
[
num_data_
>>
1
]
=
(
mem_data
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
}
}
else
{
for
(
size_t
i
=
0
;
i
<
data_
.
size
();
++
i
)
{
data_
[
i
]
=
mem_data
[
i
];
}
}
}
void
CopySubrow
(
const
Bin
*
full_bin
,
const
data_size_t
*
used_indices
,
data_size_t
num_used_indices
)
override
{
auto
other_bin
=
dynamic_cast
<
const
Dense4bitsBin
*>
(
full_bin
);
const
data_size_t
rest
=
num_used_indices
&
1
;
for
(
int
i
=
0
;
i
<
num_used_indices
-
rest
;
i
+=
2
)
{
data_size_t
idx
=
used_indices
[
i
];
const
auto
bin1
=
static_cast
<
uint8_t
>
((
other_bin
->
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
);
idx
=
used_indices
[
i
+
1
];
const
auto
bin2
=
static_cast
<
uint8_t
>
((
other_bin
->
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
);
const
int
i1
=
i
>>
1
;
data_
[
i1
]
=
(
bin1
|
(
bin2
<<
4
));
}
if
(
rest
)
{
data_size_t
idx
=
used_indices
[
num_used_indices
-
1
];
data_
[
num_used_indices
>>
1
]
=
(
other_bin
->
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
}
}
void
SaveBinaryToFile
(
const
VirtualFileWriter
*
writer
)
const
override
{
writer
->
Write
(
data_
.
data
(),
sizeof
(
uint8_t
)
*
data_
.
size
());
}
size_t
SizesInByte
()
const
override
{
return
sizeof
(
uint8_t
)
*
data_
.
size
();
}
Dense4bitsBin
*
Clone
()
override
{
return
new
Dense4bitsBin
(
*
this
);
}
protected:
Dense4bitsBin
(
const
Dense4bitsBin
&
other
)
:
num_data_
(
other
.
num_data_
),
data_
(
other
.
data_
),
buf_
(
other
.
buf_
)
{
}
data_size_t
num_data_
;
std
::
vector
<
uint8_t
,
Common
::
AlignmentAllocator
<
uint8_t
,
kAlignedSize
>>
data_
;
std
::
vector
<
uint8_t
>
buf_
;
};
uint32_t
Dense4bitsBinIterator
::
Get
(
data_size_t
idx
)
{
const
auto
bin
=
(
bin_data_
->
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
if
(
bin
>=
min_bin_
&&
bin
<=
max_bin_
)
{
return
bin
-
min_bin_
+
offset_
;
}
else
{
return
most_freq_bin_
;
}
}
uint32_t
Dense4bitsBinIterator
::
RawGet
(
data_size_t
idx
)
{
return
(
bin_data_
->
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
}
inline
BinIterator
*
Dense4bitsBin
::
GetIterator
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
)
const
{
return
new
Dense4bitsBinIterator
(
this
,
min_bin
,
max_bin
,
most_freq_bin
);
}
}
// namespace LightGBM
#endif // LIGHTGBM_IO_DENSE_NBITS_BIN_HPP_
src/io/sparse_bin.hpp
View file @
bcad692e
This diff is collapsed.
Click to expand it.
src/treelearner/data_partition.hpp
View file @
bcad692e
...
...
@@ -106,7 +106,7 @@ class DataPartition {
const
data_size_t
begin
=
leaf_begin_
[
leaf
];
const
data_size_t
cnt
=
leaf_count_
[
leaf
];
auto
left_start
=
indices_
.
data
()
+
begin
;
auto
left_cnt
=
runner_
.
Run
<
false
>
(
const
auto
left_cnt
=
runner_
.
Run
<
false
>
(
cnt
,
[
=
](
int
,
data_size_t
cur_start
,
data_size_t
cur_cnt
,
data_size_t
*
left
,
data_size_t
*
right
)
{
...
...
src/treelearner/feature_histogram.hpp
View file @
bcad692e
...
...
@@ -144,72 +144,67 @@ class FeatureHistogram {
template
<
bool
USE_RAND
,
bool
USE_MC
,
bool
USE_L1
,
bool
USE_MAX_OUTPUT
>
void
FuncForNumricalL2
()
{
#define TEMPLATE_PREFIX USE_RAND, USE_MC, USE_L1, USE_MAX_OUTPUT
#define LAMBDA_ARGUMENTS \
double sum_gradient, double sum_hessian, data_size_t num_data, \
const ConstraintEntry &constraints, SplitInfo *output
#define BEFORE_ARGUMENTS sum_gradient, sum_hessian, output, &rand_threshold
#define FUNC_ARGUMENTS \
sum_gradient, sum_hessian, num_data, constraints, min_gain_shift, output, \
rand_threshold
if
(
meta_
->
num_bin
>
2
&&
meta_
->
missing_type
!=
MissingType
::
None
)
{
if
(
meta_
->
missing_type
==
MissingType
::
Zero
)
{
find_best_threshold_fun_
=
[
=
](
double
sum_gradient
,
double
sum_hessian
,
data_size_t
num_data
,
const
ConstraintEntry
&
constraints
,
SplitInfo
*
output
)
{
int
rand_threshold
=
0
;
double
min_gain_shift
=
BeforeNumercal
<
USE_RAND
,
USE_L1
,
USE_MAX_OUTPUT
>
(
sum_gradient
,
sum_hessian
,
output
,
&
rand_threshold
);
FindBestThresholdSequentially
<
USE_RAND
,
USE_MC
,
USE_L1
,
USE_MAX_OUTPUT
,
true
,
true
,
false
>
(
sum_gradient
,
sum_hessian
,
num_data
,
constraints
,
min_gain_shift
,
output
,
rand_threshold
);
FindBestThresholdSequentially
<
USE_RAND
,
USE_MC
,
USE_L1
,
USE_MAX_OUTPUT
,
false
,
true
,
false
>
(
sum_gradient
,
sum_hessian
,
num_data
,
constraints
,
min_gain_shift
,
output
,
rand_threshold
);
};
find_best_threshold_fun_
=
[
=
](
LAMBDA_ARGUMENTS
)
{
int
rand_threshold
=
0
;
double
min_gain_shift
=
BeforeNumercal
<
USE_RAND
,
USE_L1
,
USE_MAX_OUTPUT
>
(
BEFORE_ARGUMENTS
);
FindBestThresholdSequentially
<
TEMPLATE_PREFIX
,
true
,
true
,
false
>
(
FUNC_ARGUMENTS
);
FindBestThresholdSequentially
<
TEMPLATE_PREFIX
,
false
,
true
,
false
>
(
FUNC_ARGUMENTS
);
};
}
else
{
find_best_threshold_fun_
=
[
=
](
double
sum_gradient
,
double
sum_hessian
,
data_size_t
num_data
,
const
ConstraintEntry
&
constraints
,
SplitInfo
*
output
)
{
int
rand_threshold
=
0
;
double
min_gain_shift
=
BeforeNumercal
<
USE_RAND
,
USE_L1
,
USE_MAX_OUTPUT
>
(
sum_gradient
,
sum_hessian
,
output
,
&
rand_threshold
);
FindBestThresholdSequentially
<
USE_RAND
,
USE_MC
,
USE_L1
,
USE_MAX_OUTPUT
,
true
,
false
,
true
>
(
sum_gradient
,
sum_hessian
,
num_data
,
constraints
,
min_gain_shift
,
output
,
rand_threshold
);
FindBestThresholdSequentially
<
USE_RAND
,
USE_MC
,
USE_L1
,
USE_MAX_OUTPUT
,
false
,
false
,
true
>
(
sum_gradient
,
sum_hessian
,
num_data
,
constraints
,
min_gain_shift
,
output
,
rand_threshold
);
};
find_best_threshold_fun_
=
[
=
](
LAMBDA_ARGUMENTS
)
{
int
rand_threshold
=
0
;
double
min_gain_shift
=
BeforeNumercal
<
USE_RAND
,
USE_L1
,
USE_MAX_OUTPUT
>
(
BEFORE_ARGUMENTS
);
FindBestThresholdSequentially
<
TEMPLATE_PREFIX
,
true
,
false
,
true
>
(
FUNC_ARGUMENTS
);
FindBestThresholdSequentially
<
TEMPLATE_PREFIX
,
false
,
false
,
true
>
(
FUNC_ARGUMENTS
);
};
}
}
else
{
if
(
meta_
->
missing_type
!=
MissingType
::
NaN
)
{
find_best_threshold_fun_
=
[
=
](
double
sum_gradient
,
double
sum_hessian
,
data_size_t
num_data
,
const
ConstraintEntry
&
constraints
,
SplitInfo
*
output
)
{
int
rand_threshold
=
0
;
double
min_gain_shift
=
BeforeNumercal
<
USE_RAND
,
USE_L1
,
USE_MAX_OUTPUT
>
(
sum_gradient
,
sum_hessian
,
output
,
&
rand_threshold
);
FindBestThresholdSequentially
<
USE_RAND
,
USE_MC
,
USE_L1
,
USE_MAX_OUTPUT
,
true
,
false
,
false
>
(
sum_gradient
,
sum_hessian
,
num_data
,
constraints
,
min_gain_shift
,
output
,
rand_threshold
);
};
find_best_threshold_fun_
=
[
=
](
LAMBDA_ARGUMENTS
)
{
int
rand_threshold
=
0
;
double
min_gain_shift
=
BeforeNumercal
<
USE_RAND
,
USE_L1
,
USE_MAX_OUTPUT
>
(
BEFORE_ARGUMENTS
);
FindBestThresholdSequentially
<
TEMPLATE_PREFIX
,
true
,
false
,
false
>
(
FUNC_ARGUMENTS
);
};
}
else
{
find_best_threshold_fun_
=
[
=
](
double
sum_gradient
,
double
sum_hessian
,
data_size_t
num_data
,
const
ConstraintEntry
&
constraints
,
SplitInfo
*
output
)
{
int
rand_threshold
=
0
;
double
min_gain_shift
=
BeforeNumercal
<
USE_RAND
,
USE_L1
,
USE_MAX_OUTPUT
>
(
sum_gradient
,
sum_hessian
,
output
,
&
rand_threshold
);
FindBestThresholdSequentially
<
USE_RAND
,
USE_MC
,
USE_L1
,
USE_MAX_OUTPUT
,
true
,
false
,
false
>
(
sum_gradient
,
sum_hessian
,
num_data
,
constraints
,
min_gain_shift
,
output
,
rand_threshold
);
output
->
default_left
=
false
;
};
find_best_threshold_fun_
=
[
=
](
LAMBDA_ARGUMENTS
)
{
int
rand_threshold
=
0
;
double
min_gain_shift
=
BeforeNumercal
<
USE_RAND
,
USE_L1
,
USE_MAX_OUTPUT
>
(
BEFORE_ARGUMENTS
);
FindBestThresholdSequentially
<
USE_RAND
,
USE_MC
,
USE_L1
,
USE_MAX_OUTPUT
,
true
,
false
,
false
>
(
FUNC_ARGUMENTS
);
output
->
default_left
=
false
;
};
}
}
#undef TEMPLATE_PREFIX
#undef LAMBDA_ARGUMENTS
#undef BEFORE_ARGUMENTS
#undef FUNC_ARGURMENTS
}
void
FuncForCategorical
()
{
...
...
@@ -227,41 +222,38 @@ class FeatureHistogram {
}
}
}
template
<
bool
USE_RAND
,
bool
USE_MC
>
void
FuncForCategoricalL1
()
{
#define ARGUMENTS \
std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, \
std::placeholders::_4, std::placeholders::_5
if
(
meta_
->
config
->
lambda_l1
>
0
)
{
if
(
meta_
->
config
->
max_delta_step
>
0
)
{
find_best_threshold_fun_
=
std
::
bind
(
&
FeatureHistogram
::
FindBestThresholdCategoricalInner
<
USE_RAND
,
USE_MC
,
true
,
true
>
,
this
,
std
::
placeholders
::
_1
,
std
::
placeholders
::
_2
,
std
::
placeholders
::
_3
,
std
::
placeholders
::
_4
,
std
::
placeholders
::
_5
);
this
,
ARGUMENTS
);
}
else
{
find_best_threshold_fun_
=
std
::
bind
(
&
FeatureHistogram
::
FindBestThresholdCategoricalInner
<
USE_RAND
,
USE_MC
,
true
,
false
>
,
this
,
std
::
placeholders
::
_1
,
std
::
placeholders
::
_2
,
std
::
placeholders
::
_3
,
std
::
placeholders
::
_4
,
std
::
placeholders
::
_5
);
this
,
ARGUMENTS
);
}
}
else
{
if
(
meta_
->
config
->
max_delta_step
>
0
)
{
find_best_threshold_fun_
=
std
::
bind
(
&
FeatureHistogram
::
FindBestThresholdCategoricalInner
<
USE_RAND
,
USE_MC
,
false
,
true
>
,
this
,
std
::
placeholders
::
_1
,
std
::
placeholders
::
_2
,
std
::
placeholders
::
_3
,
std
::
placeholders
::
_4
,
std
::
placeholders
::
_5
);
this
,
ARGUMENTS
);
}
else
{
find_best_threshold_fun_
=
std
::
bind
(
&
FeatureHistogram
::
FindBestThresholdCategoricalInner
<
USE_RAND
,
USE_MC
,
false
,
false
>
,
this
,
std
::
placeholders
::
_1
,
std
::
placeholders
::
_2
,
std
::
placeholders
::
_3
,
std
::
placeholders
::
_4
,
std
::
placeholders
::
_5
);
this
,
ARGUMENTS
);
}
}
#undef ARGUMENTS
}
template
<
bool
USE_RAND
,
bool
USE_MC
,
bool
USE_L1
,
bool
USE_MAX_OUTPUT
>
...
...
src/treelearner/gpu_tree_learner.cpp
View file @
bcad692e
...
...
@@ -13,7 +13,6 @@
#include <algorithm>
#include "../io/dense_bin.hpp"
#include "../io/dense_nbits_bin.hpp"
#define GPU_DEBUG 0
...
...
@@ -378,20 +377,20 @@ void GPUTreeLearner::AllocateGPUMemory() {
BinIterator
*
bin_iters
[
8
];
for
(
int
s_idx
=
0
;
s_idx
<
8
;
++
s_idx
)
{
bin_iters
[
s_idx
]
=
train_data_
->
FeatureGroupIterator
(
dense_ind
[
s_idx
]);
if
(
dynamic_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
s_idx
])
==
0
)
{
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
s_idx
])
==
0
)
{
Log
::
Fatal
(
"GPU tree learner assumes that all bins are Dense4bitsBin when num_bin <= 16, but feature %d is not"
,
dense_ind
[
s_idx
]);
}
}
// this guarantees that the RawGet() function is inlined, rather than using virtual function dispatching
Dense
4bits
BinIterator
iters
[
8
]
=
{
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
0
]),
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
1
]),
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
2
]),
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
3
]),
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
4
]),
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
5
]),
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
6
]),
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
7
])};
DenseBinIterator
<
uint8_t
,
true
>
iters
[
8
]
=
{
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
0
]),
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
1
]),
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
2
]),
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
3
]),
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
4
]),
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
5
]),
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
6
]),
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
7
])};
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
0
]
=
(
uint8_t
)((
iters
[
0
].
RawGet
(
j
)
*
dev_bin_mult
[
0
]
+
((
j
+
0
)
&
(
dev_bin_mult
[
0
]
-
1
)))
|
((
iters
[
1
].
RawGet
(
j
)
*
dev_bin_mult
[
1
]
+
((
j
+
1
)
&
(
dev_bin_mult
[
1
]
-
1
)))
<<
4
));
...
...
@@ -407,15 +406,15 @@ void GPUTreeLearner::AllocateGPUMemory() {
for
(
int
s_idx
=
0
;
s_idx
<
4
;
++
s_idx
)
{
BinIterator
*
bin_iter
=
train_data_
->
FeatureGroupIterator
(
dense_ind
[
s_idx
]);
// this guarantees that the RawGet() function is inlined, rather than using virtual function dispatching
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
>*>
(
bin_iter
)
!=
0
)
{
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
false
>*>
(
bin_iter
)
!=
0
)
{
// Dense bin
DenseBinIterator
<
uint8_t
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
>*>
(
bin_iter
);
DenseBinIterator
<
uint8_t
,
false
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
,
false
>*>
(
bin_iter
);
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
s_idx
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
dev_bin_mult
[
s_idx
]
+
((
j
+
s_idx
)
&
(
dev_bin_mult
[
s_idx
]
-
1
)));
}
}
else
if
(
dynamic_cast
<
Dense
4bits
BinIterator
*>
(
bin_iter
)
!=
0
)
{
}
else
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iter
)
!=
0
)
{
// Dense 4-bit bin
Dense
4bits
BinIterator
iter
=
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iter
);
DenseBinIterator
<
uint8_t
,
true
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iter
);
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
s_idx
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
dev_bin_mult
[
s_idx
]
+
((
j
+
s_idx
)
&
(
dev_bin_mult
[
s_idx
]
-
1
)));
}
...
...
@@ -450,8 +449,8 @@ void GPUTreeLearner::AllocateGPUMemory() {
for
(
int
i
=
0
;
i
<
k
;
++
i
)
{
if
(
dword_features_
==
8
)
{
BinIterator
*
bin_iter
=
train_data_
->
FeatureGroupIterator
(
dense_dword_ind
[
i
]);
if
(
dynamic_cast
<
Dense
4bits
BinIterator
*>
(
bin_iter
)
!=
0
)
{
Dense
4bits
BinIterator
iter
=
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iter
);
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iter
)
!=
0
)
{
DenseBinIterator
<
uint8_t
,
true
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iter
);
#pragma omp parallel for schedule(static)
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
i
>>
1
]
|=
(
uint8_t
)((
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
...
...
@@ -463,15 +462,15 @@ void GPUTreeLearner::AllocateGPUMemory() {
}
}
else
if
(
dword_features_
==
4
)
{
BinIterator
*
bin_iter
=
train_data_
->
FeatureGroupIterator
(
dense_dword_ind
[
i
]);
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
>*>
(
bin_iter
)
!=
0
)
{
DenseBinIterator
<
uint8_t
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
>*>
(
bin_iter
);
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
false
>*>
(
bin_iter
)
!=
0
)
{
DenseBinIterator
<
uint8_t
,
false
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
,
false
>*>
(
bin_iter
);
#pragma omp parallel for schedule(static)
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
i
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
+
((
j
+
i
)
&
(
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
-
1
)));
}
}
else
if
(
dynamic_cast
<
Dense
4bits
BinIterator
*>
(
bin_iter
)
!=
0
)
{
Dense
4bits
BinIterator
iter
=
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iter
);
}
else
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iter
)
!=
0
)
{
DenseBinIterator
<
uint8_t
,
true
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iter
);
#pragma omp parallel for schedule(static)
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
i
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
...
...
windows/LightGBM.vcxproj
View file @
bcad692e
...
...
@@ -263,7 +263,6 @@
<ClInclude
Include=
"..\src\boosting\rf.hpp"
/>
<ClInclude
Include=
"..\src\boosting\score_updater.hpp"
/>
<ClInclude
Include=
"..\src\io\dense_bin.hpp"
/>
<ClInclude
Include=
"..\src\io\dense_nbits_bin.hpp"
/>
<ClInclude
Include=
"..\src\io\multi_val_dense_bin.hpp"
/>
<ClInclude
Include=
"..\src\io\multi_val_sparse_bin.hpp"
/>
<ClInclude
Include=
"..\src\io\parser.hpp"
/>
...
...
windows/LightGBM.vcxproj.filters
View file @
bcad692e
...
...
@@ -174,9 +174,6 @@
<ClInclude
Include=
"..\src\boosting\goss.hpp"
>
<Filter>
src\boosting
</Filter>
</ClInclude>
<ClInclude
Include=
"..\src\io\dense_nbits_bin.hpp"
>
<Filter>
src\io
</Filter>
</ClInclude>
<ClInclude
Include=
"..\include\LightGBM\utils\openmp_wrapper.h"
>
<Filter>
include\LightGBM\utils
</Filter>
</ClInclude>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment