Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
bcad692e
Unverified
Commit
bcad692e
authored
Mar 08, 2020
by
Guolin Ke
Committed by
GitHub
Mar 08, 2020
Browse files
Speed-up "Split" and some code refactorings (#2883)
* commit * fix msvc * fix format
parent
1a48fd26
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
703 additions
and
838 deletions
+703
-838
include/LightGBM/bin.h
include/LightGBM/bin.h
+25
-90
include/LightGBM/dataset.h
include/LightGBM/dataset.h
+8
-5
include/LightGBM/feature_group.h
include/LightGBM/feature_group.h
+29
-14
include/LightGBM/meta.h
include/LightGBM/meta.h
+3
-0
src/io/bin.cpp
src/io/bin.cpp
+8
-8
src/io/dense_bin.hpp
src/io/dense_bin.hpp
+311
-139
src/io/dense_nbits_bin.hpp
src/io/dense_nbits_bin.hpp
+0
-354
src/io/sparse_bin.hpp
src/io/sparse_bin.hpp
+237
-133
src/treelearner/data_partition.hpp
src/treelearner/data_partition.hpp
+1
-1
src/treelearner/feature_histogram.hpp
src/treelearner/feature_histogram.hpp
+61
-69
src/treelearner/gpu_tree_learner.cpp
src/treelearner/gpu_tree_learner.cpp
+20
-21
windows/LightGBM.vcxproj
windows/LightGBM.vcxproj
+0
-1
windows/LightGBM.vcxproj.filters
windows/LightGBM.vcxproj.filters
+0
-3
No files found.
include/LightGBM/bin.h
View file @
bcad692e
...
@@ -218,61 +218,6 @@ class BinMapper {
...
@@ -218,61 +218,6 @@ class BinMapper {
uint32_t
most_freq_bin_
;
uint32_t
most_freq_bin_
;
};
};
/*!
* \brief Interface for ordered bin data. efficient for construct histogram, especially for sparse bin
* There are 2 advantages by using ordered bin.
* 1. group the data by leafs to improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram construction for sparse features.
* However it brings additional cost: it need re-order the bins after every split, which will cost much for dense feature.
* So we only using ordered bin for sparse situations.
*/
class
OrderedBin
{
public:
/*! \brief virtual destructor */
virtual
~
OrderedBin
()
{}
/*!
* \brief Initialization logic.
* \param used_indices If used_indices.size() == 0 means using all data, otherwise, used_indices[i] == true means i-th data is used
(this logic was build for bagging logic)
* \param num_leaves Number of leaves on this iteration
*/
virtual
void
Init
(
const
char
*
used_indices
,
data_size_t
num_leaves
)
=
0
;
/*!
* \brief Construct histogram by using this bin
* Note: Unlike Bin, OrderedBin doesn't use ordered gradients and ordered hessians.
* Because it is hard to know the relative index in one leaf for sparse bin, since we skipped zero bins.
* \param leaf Using which leaf's data to construct
* \param gradients Gradients, Note:non-ordered by leaf
* \param hessians Hessians, Note:non-ordered by leaf
* \param out Output Result
*/
virtual
void
ConstructHistogram
(
int
leaf
,
const
score_t
*
gradients
,
const
score_t
*
hessians
,
hist_t
*
out
)
const
=
0
;
/*!
* \brief Construct histogram by using this bin
* Note: Unlike Bin, OrderedBin doesn't use ordered gradients and ordered hessians.
* Because it is hard to know the relative index in one leaf for sparse bin, since we skipped zero bins.
* \param leaf Using which leaf's data to construct
* \param gradients Gradients, Note:non-ordered by leaf
* \param out Output Result
*/
virtual
void
ConstructHistogram
(
int
leaf
,
const
score_t
*
gradients
,
hist_t
*
out
)
const
=
0
;
/*!
* \brief Split current bin, and perform re-order by leaf
* \param leaf Using which leaf's to split
* \param right_leaf The new leaf index after perform this split
* \param is_in_leaf is_in_leaf[i] == mark means the i-th data will be on left leaf after split
* \param mark is_in_leaf[i] == mark means the i-th data will be on left leaf after split
*/
virtual
void
Split
(
int
leaf
,
int
right_leaf
,
const
char
*
is_in_leaf
,
char
mark
)
=
0
;
virtual
data_size_t
NonZeroCount
(
int
leaf
)
const
=
0
;
};
/*! \brief Iterator for one bin column */
/*! \brief Iterator for one bin column */
class
BinIterator
{
class
BinIterator
{
public:
public:
...
@@ -382,43 +327,33 @@ class Bin {
...
@@ -382,43 +327,33 @@ class Bin {
virtual
void
ConstructHistogram
(
data_size_t
start
,
data_size_t
end
,
virtual
void
ConstructHistogram
(
data_size_t
start
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
hist_t
*
out
)
const
=
0
;
const
score_t
*
ordered_gradients
,
hist_t
*
out
)
const
=
0
;
/*!
* \brief Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices)
* \param min_bin min_bin of current used feature
* \param max_bin max_bin of current used feature
* \param default_bin default bin for feature value 0
* \param most_freq_bin
* \param missing_type missing type
* \param default_left missing bin will go to left child
* \param threshold The split threshold.
* \param data_indices Used data indices. After called this function. The less than or equal data indices will store on this object.
* \param num_data Number of used data
* \param lte_indices After called this function. The less or equal data indices will store on this object.
* \param gt_indices After called this function. The greater data indices will store on this object.
* \return The number of less than or equal data.
*/
virtual
data_size_t
Split
(
uint32_t
min_bin
,
uint32_t
max_bin
,
virtual
data_size_t
Split
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
default_bin
,
uint32_t
most_freq_bin
,
MissingType
missing_type
,
bool
default_left
,
uint32_t
threshold
,
uint32_t
default_bin
,
uint32_t
most_freq_bin
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
MissingType
missing_type
,
bool
default_left
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
=
0
;
uint32_t
threshold
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
=
0
;
virtual
data_size_t
SplitCategorical
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
,
const
uint32_t
*
threshold
,
int
num_threshold
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
=
0
;
virtual
data_size_t
Split
(
uint32_t
max_bin
,
uint32_t
default_bin
,
uint32_t
most_freq_bin
,
MissingType
missing_type
,
bool
default_left
,
uint32_t
threshold
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
=
0
;
virtual
data_size_t
SplitCategorical
(
uint32_t
max_bin
,
uint32_t
most_freq_bin
,
const
uint32_t
*
threshold
,
int
num_threshold
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
=
0
;
/*!
* \brief Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices)
* \param min_bin min_bin of current used feature
* \param max_bin max_bin of current used feature
* \param most_freq_bin
* \param threshold The split threshold.
* \param num_threshold Number of threshold
* \param data_indices Used data indices. After called this function. The less than or equal data indices will store on this object.
* \param num_data Number of used data
* \param lte_indices After called this function. The less or equal data indices will store on this object.
* \param gt_indices After called this function. The greater data indices will store on this object.
* \return The number of less than or equal data.
*/
virtual
data_size_t
SplitCategorical
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
,
const
uint32_t
*
threshold
,
int
num_threshold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
=
0
;
/*!
/*!
* \brief After pushed all feature data, call this could have better refactor for bin data
* \brief After pushed all feature data, call this could have better refactor for bin data
...
...
include/LightGBM/dataset.h
View file @
bcad692e
...
@@ -535,13 +535,16 @@ class Dataset {
...
@@ -535,13 +535,16 @@ class Dataset {
void
FixHistogram
(
int
feature_idx
,
double
sum_gradient
,
double
sum_hessian
,
hist_t
*
data
)
const
;
void
FixHistogram
(
int
feature_idx
,
double
sum_gradient
,
double
sum_hessian
,
hist_t
*
data
)
const
;
inline
data_size_t
Split
(
int
feature
,
inline
data_size_t
Split
(
int
feature
,
const
uint32_t
*
threshold
,
const
uint32_t
*
threshold
,
int
num_threshold
,
bool
default_left
,
int
num_threshold
,
bool
default_left
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
const
data_size_t
*
data_indices
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
{
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
{
const
int
group
=
feature2group_
[
feature
];
const
int
group
=
feature2group_
[
feature
];
const
int
sub_feature
=
feature2subfeature_
[
feature
];
const
int
sub_feature
=
feature2subfeature_
[
feature
];
return
feature_groups_
[
group
]
->
Split
(
sub_feature
,
threshold
,
num_threshold
,
default_left
,
data_indices
,
num_data
,
lte_indices
,
gt_indices
);
return
feature_groups_
[
group
]
->
Split
(
sub_feature
,
threshold
,
num_threshold
,
default_left
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
}
inline
int
SubFeatureBinOffset
(
int
i
)
const
{
inline
int
SubFeatureBinOffset
(
int
i
)
const
{
...
...
include/LightGBM/feature_group.h
View file @
bcad692e
...
@@ -228,13 +228,11 @@ class FeatureGroup {
...
@@ -228,13 +228,11 @@ class FeatureGroup {
return
bin_data_
->
GetIterator
(
min_bin
,
max_bin
,
most_freq_bin
);
return
bin_data_
->
GetIterator
(
min_bin
,
max_bin
,
most_freq_bin
);
}
}
inline
data_size_t
Split
(
inline
data_size_t
Split
(
int
sub_feature
,
const
uint32_t
*
threshold
,
int
sub_feature
,
int
num_threshold
,
bool
default_left
,
const
uint32_t
*
threshold
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
int
num_threshold
,
data_size_t
*
lte_indices
,
bool
default_left
,
data_size_t
*
gt_indices
)
const
{
data_size_t
*
data_indices
,
data_size_t
num_data
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
{
uint32_t
default_bin
=
bin_mappers_
[
sub_feature
]
->
GetDefaultBin
();
uint32_t
default_bin
=
bin_mappers_
[
sub_feature
]
->
GetDefaultBin
();
uint32_t
most_freq_bin
=
bin_mappers_
[
sub_feature
]
->
GetMostFreqBin
();
uint32_t
most_freq_bin
=
bin_mappers_
[
sub_feature
]
->
GetMostFreqBin
();
if
(
!
is_multi_val_
)
{
if
(
!
is_multi_val_
)
{
...
@@ -242,21 +240,38 @@ class FeatureGroup {
...
@@ -242,21 +240,38 @@ class FeatureGroup {
uint32_t
max_bin
=
bin_offsets_
[
sub_feature
+
1
]
-
1
;
uint32_t
max_bin
=
bin_offsets_
[
sub_feature
+
1
]
-
1
;
if
(
bin_mappers_
[
sub_feature
]
->
bin_type
()
==
BinType
::
NumericalBin
)
{
if
(
bin_mappers_
[
sub_feature
]
->
bin_type
()
==
BinType
::
NumericalBin
)
{
auto
missing_type
=
bin_mappers_
[
sub_feature
]
->
missing_type
();
auto
missing_type
=
bin_mappers_
[
sub_feature
]
->
missing_type
();
return
bin_data_
->
Split
(
min_bin
,
max_bin
,
default_bin
,
most_freq_bin
,
missing_type
,
default_left
,
if
(
num_feature_
==
1
)
{
*
threshold
,
data_indices
,
num_data
,
lte_indices
,
gt_indices
);
return
bin_data_
->
Split
(
max_bin
,
default_bin
,
most_freq_bin
,
missing_type
,
default_left
,
*
threshold
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
else
{
return
bin_data_
->
Split
(
min_bin
,
max_bin
,
default_bin
,
most_freq_bin
,
missing_type
,
default_left
,
*
threshold
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
}
else
{
}
else
{
return
bin_data_
->
SplitCategorical
(
min_bin
,
max_bin
,
most_freq_bin
,
threshold
,
num_threshold
,
data_indices
,
num_data
,
lte_indices
,
gt_indices
);
if
(
num_feature_
==
1
)
{
return
bin_data_
->
SplitCategorical
(
max_bin
,
most_freq_bin
,
threshold
,
num_threshold
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
else
{
return
bin_data_
->
SplitCategorical
(
min_bin
,
max_bin
,
most_freq_bin
,
threshold
,
num_threshold
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
}
}
}
else
{
}
else
{
int
addi
=
bin_mappers_
[
sub_feature
]
->
GetMostFreqBin
()
==
0
?
0
:
1
;
int
addi
=
bin_mappers_
[
sub_feature
]
->
GetMostFreqBin
()
==
0
?
0
:
1
;
uint32_t
min_bin
=
1
;
uint32_t
max_bin
=
bin_mappers_
[
sub_feature
]
->
num_bin
()
-
1
+
addi
;
uint32_t
max_bin
=
bin_mappers_
[
sub_feature
]
->
num_bin
()
-
1
+
addi
;
if
(
bin_mappers_
[
sub_feature
]
->
bin_type
()
==
BinType
::
NumericalBin
)
{
if
(
bin_mappers_
[
sub_feature
]
->
bin_type
()
==
BinType
::
NumericalBin
)
{
auto
missing_type
=
bin_mappers_
[
sub_feature
]
->
missing_type
();
auto
missing_type
=
bin_mappers_
[
sub_feature
]
->
missing_type
();
return
multi_bin_data_
[
sub_feature
]
->
Split
(
min_bin
,
max_bin
,
default_bin
,
most_freq_bin
,
missing_type
,
default_left
,
return
multi_bin_data_
[
sub_feature
]
->
Split
(
*
threshold
,
data_indices
,
num_data
,
lte_indices
,
gt_indices
);
max_bin
,
default_bin
,
most_freq_bin
,
missing_type
,
default_left
,
*
threshold
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
else
{
}
else
{
return
multi_bin_data_
[
sub_feature
]
->
SplitCategorical
(
min_bin
,
max_bin
,
most_freq_bin
,
threshold
,
num_threshold
,
data_indices
,
num_data
,
lte_indices
,
gt_indices
);
return
multi_bin_data_
[
sub_feature
]
->
SplitCategorical
(
max_bin
,
most_freq_bin
,
threshold
,
num_threshold
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
}
}
}
}
}
...
...
include/LightGBM/meta.h
View file @
bcad692e
...
@@ -75,6 +75,9 @@ const int kAlignedSize = 32;
...
@@ -75,6 +75,9 @@ const int kAlignedSize = 32;
#define SIZE_ALIGNED(t) ((t) + kAlignedSize - 1) / kAlignedSize * kAlignedSize
#define SIZE_ALIGNED(t) ((t) + kAlignedSize - 1) / kAlignedSize * kAlignedSize
// Refer to https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-4-c4127?view=vs-2019
#pragma warning(disable : 4127)
}
// namespace LightGBM
}
// namespace LightGBM
#endif // LightGBM_META_H_
#endif // LightGBM_META_H_
src/io/bin.cpp
View file @
bcad692e
...
@@ -14,7 +14,6 @@
...
@@ -14,7 +14,6 @@
#include <cstring>
#include <cstring>
#include "dense_bin.hpp"
#include "dense_bin.hpp"
#include "dense_nbits_bin.hpp"
#include "multi_val_dense_bin.hpp"
#include "multi_val_dense_bin.hpp"
#include "multi_val_sparse_bin.hpp"
#include "multi_val_sparse_bin.hpp"
#include "sparse_bin.hpp"
#include "sparse_bin.hpp"
...
@@ -633,9 +632,10 @@ namespace LightGBM {
...
@@ -633,9 +632,10 @@ namespace LightGBM {
return
ret
;
return
ret
;
}
}
template
class
DenseBin
<
uint8_t
>;
template
class
DenseBin
<
uint8_t
,
true
>;
template
class
DenseBin
<
uint16_t
>;
template
class
DenseBin
<
uint8_t
,
false
>;
template
class
DenseBin
<
uint32_t
>;
template
class
DenseBin
<
uint16_t
,
false
>;
template
class
DenseBin
<
uint32_t
,
false
>;
template
class
SparseBin
<
uint8_t
>;
template
class
SparseBin
<
uint8_t
>;
template
class
SparseBin
<
uint16_t
>;
template
class
SparseBin
<
uint16_t
>;
...
@@ -647,13 +647,13 @@ namespace LightGBM {
...
@@ -647,13 +647,13 @@ namespace LightGBM {
Bin
*
Bin
::
CreateDenseBin
(
data_size_t
num_data
,
int
num_bin
)
{
Bin
*
Bin
::
CreateDenseBin
(
data_size_t
num_data
,
int
num_bin
)
{
if
(
num_bin
<=
16
)
{
if
(
num_bin
<=
16
)
{
return
new
Dense
4bitsBin
(
num_data
);
return
new
Dense
Bin
<
uint8_t
,
true
>
(
num_data
);
}
else
if
(
num_bin
<=
256
)
{
}
else
if
(
num_bin
<=
256
)
{
return
new
DenseBin
<
uint8_t
>
(
num_data
);
return
new
DenseBin
<
uint8_t
,
false
>
(
num_data
);
}
else
if
(
num_bin
<=
65536
)
{
}
else
if
(
num_bin
<=
65536
)
{
return
new
DenseBin
<
uint16_t
>
(
num_data
);
return
new
DenseBin
<
uint16_t
,
false
>
(
num_data
);
}
else
{
}
else
{
return
new
DenseBin
<
uint32_t
>
(
num_data
);
return
new
DenseBin
<
uint32_t
,
false
>
(
num_data
);
}
}
}
}
...
...
src/io/dense_bin.hpp
View file @
bcad692e
/*!
/*!
* Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
* Licensed under the MIT License. See LICENSE file in the project root for
* license information.
*/
*/
#ifndef LIGHTGBM_IO_DENSE_BIN_HPP_
#ifndef LIGHTGBM_IO_DENSE_BIN_HPP_
#define LIGHTGBM_IO_DENSE_BIN_HPP_
#define LIGHTGBM_IO_DENSE_BIN_HPP_
...
@@ -13,16 +14,19 @@
...
@@ -13,16 +14,19 @@
namespace
LightGBM
{
namespace
LightGBM
{
template
<
typename
VAL_T
>
template
<
typename
VAL_T
,
bool
IS_4BIT
>
class
DenseBin
;
class
DenseBin
;
template
<
typename
VAL_T
>
template
<
typename
VAL_T
,
bool
IS_4BIT
>
class
DenseBinIterator
:
public
BinIterator
{
class
DenseBinIterator
:
public
BinIterator
{
public:
public:
explicit
DenseBinIterator
(
const
DenseBin
<
VAL_T
>*
bin_data
,
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
)
explicit
DenseBinIterator
(
const
DenseBin
<
VAL_T
,
IS_4BIT
>*
bin_data
,
:
bin_data_
(
bin_data
),
min_bin_
(
static_cast
<
VAL_T
>
(
min_bin
)),
uint32_t
min_bin
,
uint32_t
max_bin
,
max_bin_
(
static_cast
<
VAL_T
>
(
max_bin
)),
uint32_t
most_freq_bin
)
most_freq_bin_
(
static_cast
<
VAL_T
>
(
most_freq_bin
))
{
:
bin_data_
(
bin_data
),
min_bin_
(
static_cast
<
VAL_T
>
(
min_bin
)),
max_bin_
(
static_cast
<
VAL_T
>
(
max_bin
)),
most_freq_bin_
(
static_cast
<
VAL_T
>
(
most_freq_bin
))
{
if
(
most_freq_bin_
==
0
)
{
if
(
most_freq_bin_
==
0
)
{
offset_
=
1
;
offset_
=
1
;
}
else
{
}
else
{
...
@@ -34,43 +38,68 @@ class DenseBinIterator: public BinIterator {
...
@@ -34,43 +38,68 @@ class DenseBinIterator: public BinIterator {
inline
void
Reset
(
data_size_t
)
override
{}
inline
void
Reset
(
data_size_t
)
override
{}
private:
private:
const
DenseBin
<
VAL_T
>*
bin_data_
;
const
DenseBin
<
VAL_T
,
IS_4BIT
>*
bin_data_
;
VAL_T
min_bin_
;
VAL_T
min_bin_
;
VAL_T
max_bin_
;
VAL_T
max_bin_
;
VAL_T
most_freq_bin_
;
VAL_T
most_freq_bin_
;
uint8_t
offset_
;
uint8_t
offset_
;
};
};
/*!
/*!
* \brief Used to store bins for dense feature
* \brief Used to store bins for dense feature
* Use template to reduce memory cost
* Use template to reduce memory cost
*/
*/
template
<
typename
VAL_T
>
template
<
typename
VAL_T
,
bool
IS_4BIT
>
class
DenseBin
:
public
Bin
{
class
DenseBin
:
public
Bin
{
public:
public:
friend
DenseBinIterator
<
VAL_T
>
;
friend
DenseBinIterator
<
VAL_T
,
IS_4BIT
>
;
explicit
DenseBin
(
data_size_t
num_data
)
explicit
DenseBin
(
data_size_t
num_data
)
:
num_data_
(
num_data
),
data_
(
num_data_
,
static_cast
<
VAL_T
>
(
0
))
{
:
num_data_
(
num_data
)
{
if
(
IS_4BIT
)
{
CHECK_EQ
(
sizeof
(
VAL_T
),
1
);
data_
.
resize
((
num_data_
+
1
)
/
2
,
static_cast
<
uint8_t
>
(
0
));
buf_
.
resize
((
num_data_
+
1
)
/
2
,
static_cast
<
uint8_t
>
(
0
));
}
else
{
data_
.
resize
(
num_data_
,
static_cast
<
VAL_T
>
(
0
));
}
}
}
~
DenseBin
()
{
~
DenseBin
()
{}
}
void
Push
(
int
,
data_size_t
idx
,
uint32_t
value
)
override
{
void
Push
(
int
,
data_size_t
idx
,
uint32_t
value
)
override
{
data_
[
idx
]
=
static_cast
<
VAL_T
>
(
value
);
if
(
IS_4BIT
)
{
const
int
i1
=
idx
>>
1
;
const
int
i2
=
(
idx
&
1
)
<<
2
;
const
uint8_t
val
=
static_cast
<
uint8_t
>
(
value
)
<<
i2
;
if
(
i2
==
0
)
{
data_
[
i1
]
=
val
;
}
else
{
buf_
[
i1
]
=
val
;
}
}
else
{
data_
[
idx
]
=
static_cast
<
VAL_T
>
(
value
);
}
}
}
void
ReSize
(
data_size_t
num_data
)
override
{
void
ReSize
(
data_size_t
num_data
)
override
{
if
(
num_data_
!=
num_data
)
{
if
(
num_data_
!=
num_data
)
{
num_data_
=
num_data
;
num_data_
=
num_data
;
data_
.
resize
(
num_data_
);
if
(
IS_4BIT
)
{
data_
.
resize
((
num_data_
+
1
)
/
2
,
static_cast
<
VAL_T
>
(
0
));
}
else
{
data_
.
resize
(
num_data_
);
}
}
}
}
}
BinIterator
*
GetIterator
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
)
const
override
;
BinIterator
*
GetIterator
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
)
const
override
;
template
<
bool
USE_INDICES
,
bool
USE_PREFETCH
,
bool
USE_HESSIAN
>
template
<
bool
USE_INDICES
,
bool
USE_PREFETCH
,
bool
USE_HESSIAN
>
void
ConstructHistogramInner
(
const
data_size_t
*
data_indices
,
data_size_t
start
,
data_size_t
end
,
void
ConstructHistogramInner
(
const
data_size_t
*
data_indices
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
hist_t
*
out
)
const
{
data_size_t
start
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
hist_t
*
out
)
const
{
data_size_t
i
=
start
;
data_size_t
i
=
start
;
hist_t
*
grad
=
out
;
hist_t
*
grad
=
out
;
hist_t
*
hess
=
out
+
1
;
hist_t
*
hess
=
out
+
1
;
...
@@ -80,9 +109,14 @@ class DenseBin: public Bin {
...
@@ -80,9 +109,14 @@ class DenseBin: public Bin {
const
data_size_t
pf_end
=
end
-
pf_offset
;
const
data_size_t
pf_end
=
end
-
pf_offset
;
for
(;
i
<
pf_end
;
++
i
)
{
for
(;
i
<
pf_end
;
++
i
)
{
const
auto
idx
=
USE_INDICES
?
data_indices
[
i
]
:
i
;
const
auto
idx
=
USE_INDICES
?
data_indices
[
i
]
:
i
;
const
auto
pf_idx
=
USE_INDICES
?
data_indices
[
i
+
pf_offset
]
:
i
+
pf_offset
;
const
auto
pf_idx
=
PREFETCH_T0
(
data_
.
data
()
+
pf_idx
);
USE_INDICES
?
data_indices
[
i
+
pf_offset
]
:
i
+
pf_offset
;
const
auto
ti
=
static_cast
<
uint32_t
>
(
data_
[
idx
])
<<
1
;
if
(
IS_4BIT
)
{
PREFETCH_T0
(
data_
.
data
()
+
(
pf_idx
>>
1
));
}
else
{
PREFETCH_T0
(
data_
.
data
()
+
pf_idx
);
}
const
auto
ti
=
static_cast
<
uint32_t
>
(
data
(
idx
))
<<
1
;
if
(
USE_HESSIAN
)
{
if
(
USE_HESSIAN
)
{
grad
[
ti
]
+=
ordered_gradients
[
i
];
grad
[
ti
]
+=
ordered_gradients
[
i
];
hess
[
ti
]
+=
ordered_hessians
[
i
];
hess
[
ti
]
+=
ordered_hessians
[
i
];
...
@@ -94,7 +128,7 @@ class DenseBin: public Bin {
...
@@ -94,7 +128,7 @@ class DenseBin: public Bin {
}
}
for
(;
i
<
end
;
++
i
)
{
for
(;
i
<
end
;
++
i
)
{
const
auto
idx
=
USE_INDICES
?
data_indices
[
i
]
:
i
;
const
auto
idx
=
USE_INDICES
?
data_indices
[
i
]
:
i
;
const
auto
ti
=
static_cast
<
uint32_t
>
(
data
_
[
idx
]
)
<<
1
;
const
auto
ti
=
static_cast
<
uint32_t
>
(
data
(
idx
)
)
<<
1
;
if
(
USE_HESSIAN
)
{
if
(
USE_HESSIAN
)
{
grad
[
ti
]
+=
ordered_gradients
[
i
];
grad
[
ti
]
+=
ordered_gradients
[
i
];
hess
[
ti
]
+=
ordered_hessians
[
i
];
hess
[
ti
]
+=
ordered_hessians
[
i
];
...
@@ -105,45 +139,53 @@ class DenseBin: public Bin {
...
@@ -105,45 +139,53 @@ class DenseBin: public Bin {
}
}
}
}
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
start
,
data_size_t
end
,
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
start
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
hist_t
*
out
)
const
override
{
const
score_t
*
ordered_hessians
,
ConstructHistogramInner
<
true
,
true
,
true
>
(
data_indices
,
start
,
end
,
ordered_gradients
,
ordered_hessians
,
out
);
hist_t
*
out
)
const
override
{
ConstructHistogramInner
<
true
,
true
,
true
>
(
data_indices
,
start
,
end
,
ordered_gradients
,
ordered_hessians
,
out
);
}
}
void
ConstructHistogram
(
data_size_t
start
,
data_size_t
end
,
void
ConstructHistogram
(
data_size_t
start
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
const
score_t
*
ordered_gradients
,
hist_t
*
out
)
const
override
{
const
score_t
*
ordered_hessians
,
ConstructHistogramInner
<
false
,
false
,
true
>
(
nullptr
,
start
,
end
,
ordered_gradients
,
ordered_hessians
,
out
);
hist_t
*
out
)
const
override
{
ConstructHistogramInner
<
false
,
false
,
true
>
(
nullptr
,
start
,
end
,
ordered_gradients
,
ordered_hessians
,
out
);
}
}
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
start
,
data_size_t
end
,
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
start
,
const
score_t
*
ordered_gradients
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
hist_t
*
out
)
const
override
{
hist_t
*
out
)
const
override
{
ConstructHistogramInner
<
true
,
true
,
false
>
(
data_indices
,
start
,
end
,
ordered_gradients
,
nullptr
,
out
);
ConstructHistogramInner
<
true
,
true
,
false
>
(
data_indices
,
start
,
end
,
ordered_gradients
,
nullptr
,
out
);
}
}
void
ConstructHistogram
(
data_size_t
start
,
data_size_t
end
,
void
ConstructHistogram
(
data_size_t
start
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_gradients
,
hist_t
*
out
)
const
override
{
hist_t
*
out
)
const
override
{
ConstructHistogramInner
<
false
,
false
,
false
>
(
nullptr
,
start
,
end
,
ordered_gradients
,
nullptr
,
out
);
ConstructHistogramInner
<
false
,
false
,
false
>
(
nullptr
,
start
,
end
,
ordered_gradients
,
nullptr
,
out
);
}
}
data_size_t
Split
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
default_bin
,
uint32_t
most_freq_bin
,
MissingType
missing_type
,
bool
default_left
,
template
<
bool
MISS_IS_ZERO
,
bool
MISS_IS_NA
,
bool
MFB_IS_ZERO
,
uint32_t
threshold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
bool
MFB_IS_NA
,
bool
USE_MIN_BIN
>
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
data_size_t
SplitInner
(
uint32_t
min_bin
,
uint32_t
max_bin
,
if
(
num_data
<=
0
)
{
return
0
;
}
uint32_t
default_bin
,
uint32_t
most_freq_bin
,
VAL_T
th
=
static_cast
<
VAL_T
>
(
threshold
+
min_bin
);
bool
default_left
,
uint32_t
threshold
,
const
VAL_T
minb
=
static_cast
<
VAL_T
>
(
min_bin
);
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
const
VAL_T
maxb
=
static_cast
<
VAL_T
>
(
max_bin
);
data_size_t
*
lte_indices
,
VAL_T
t_zero_bin
=
static_cast
<
VAL_T
>
(
min_bin
+
default_bin
);
data_size_t
*
gt_indices
)
const
{
VAL_T
t_most_freq_bin
=
static_cast
<
VAL_T
>
(
min_bin
+
most_freq_bin
);
auto
th
=
static_cast
<
VAL_T
>
(
threshold
+
min_bin
);
auto
t_zero_bin
=
static_cast
<
VAL_T
>
(
min_bin
+
default_bin
);
if
(
most_freq_bin
==
0
)
{
if
(
most_freq_bin
==
0
)
{
th
-=
1
;
--
th
;
t_zero_bin
-=
1
;
--
t_zero_bin
;
t_most_freq_bin
-=
1
;
}
}
const
auto
minb
=
static_cast
<
VAL_T
>
(
min_bin
);
const
auto
maxb
=
static_cast
<
VAL_T
>
(
max_bin
);
data_size_t
lte_count
=
0
;
data_size_t
lte_count
=
0
;
data_size_t
gt_count
=
0
;
data_size_t
gt_count
=
0
;
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_indices
=
gt_indices
;
...
@@ -154,68 +196,55 @@ class DenseBin: public Bin {
...
@@ -154,68 +196,55 @@ class DenseBin: public Bin {
default_indices
=
lte_indices
;
default_indices
=
lte_indices
;
default_count
=
&
lte_count
;
default_count
=
&
lte_count
;
}
}
if
(
missing_type
==
MissingType
::
NaN
)
{
if
(
MISS_IS_ZERO
||
MISS_IS_NA
)
{
if
(
default_left
)
{
if
(
default_left
)
{
missing_default_indices
=
lte_indices
;
missing_default_indices
=
lte_indices
;
missing_default_count
=
&
lte_count
;
missing_default_count
=
&
lte_count
;
}
}
if
(
t_most_freq_bin
==
maxb
)
{
}
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
if
(
min_bin
<
max_bin
)
{
const
data_size_t
idx
=
data_indices
[
i
];
for
(
data_size_t
i
=
0
;
i
<
cnt
;
++
i
)
{
const
VAL_T
bin
=
data_
[
idx
];
const
data_size_t
idx
=
data_indices
[
i
];
if
(
t_most_freq_bin
==
bin
||
bin
<
minb
||
bin
>
maxb
)
{
const
auto
bin
=
data
(
idx
);
if
((
MISS_IS_ZERO
&&
!
MFB_IS_ZERO
&&
bin
==
t_zero_bin
)
||
(
MISS_IS_NA
&&
!
MFB_IS_NA
&&
bin
==
maxb
))
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
((
USE_MIN_BIN
&&
(
bin
<
minb
||
bin
>
maxb
))
||
(
!
USE_MIN_BIN
&&
bin
==
0
))
{
if
((
MISS_IS_NA
&&
MFB_IS_NA
)
||
(
MISS_IS_ZERO
&&
MFB_IS_ZERO
))
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
else
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
VAL_T
bin
=
data_
[
idx
];
if
(
bin
==
maxb
)
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
<
minb
||
bin
>
maxb
||
t_most_freq_bin
==
bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
}
}
else
{
}
else
{
if
((
default_left
&&
missing_type
==
MissingType
::
Zero
)
data_size_t
*
max_bin_indices
=
gt_indices
;
||
(
default_bin
<=
threshold
&&
missing_type
!=
MissingType
::
Zero
))
{
data_size_t
*
max_bin_count
=
&
gt_count
;
missing_default_indices
=
lte_indices
;
if
(
maxb
<=
th
)
{
missing_default_count
=
&
lte_count
;
max_bin_indices
=
lte_indices
;
max_bin_count
=
&
lte_count
;
}
}
if
(
default_bin
==
most_freq_bin
)
{
for
(
data_size_t
i
=
0
;
i
<
cnt
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
data_size_t
idx
=
data_indices
[
i
];
const
auto
bin
=
data
(
idx
);
const
VAL_T
bin
=
data_
[
idx
];
if
(
MISS_IS_ZERO
&&
!
MFB_IS_ZERO
&&
bin
==
t_zero_bin
)
{
if
(
bin
<
minb
||
bin
>
maxb
||
t_most_freq_bin
==
bin
)
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
!=
maxb
)
{
if
((
MISS_IS_NA
&&
MFB_IS_NA
)
||
(
MISS_IS_ZERO
&&
MFB_IS_ZERO
))
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
}
else
{
lt
e
_indices
[
lt
e
_count
++
]
=
idx
;
defau
lt_indices
[
(
*
defau
lt_count
)
++
]
=
idx
;
}
}
}
}
else
{
}
else
{
if
(
MISS_IS_NA
&&
!
MFB_IS_NA
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
VAL_T
bin
=
data_
[
idx
];
if
(
bin
==
t_zero_bin
)
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
<
minb
||
bin
>
maxb
||
t_most_freq_bin
==
bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
}
else
{
lte
_indices
[
lte
_count
++
]
=
idx
;
max_bin
_indices
[
(
*
max_bin
_count
)
++
]
=
idx
;
}
}
}
}
}
}
...
@@ -223,11 +252,68 @@ class DenseBin: public Bin {
...
@@ -223,11 +252,68 @@ class DenseBin: public Bin {
return
lte_count
;
return
lte_count
;
}
}
data_size_t
SplitCategorical
(
data_size_t
Split
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
default_bin
,
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
,
uint32_t
most_freq_bin
,
MissingType
missing_type
,
const
uint32_t
*
threshold
,
int
num_threahold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
bool
default_left
,
uint32_t
threshold
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
if
(
num_data
<=
0
)
{
return
0
;
}
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
#define ARGUMENTS \
min_bin, max_bin, default_bin, most_freq_bin, default_left, threshold, \
data_indices, cnt, lte_indices, gt_indices
if
(
missing_type
==
MissingType
::
None
)
{
return
SplitInner
<
false
,
false
,
false
,
false
,
true
>
(
ARGUMENTS
);
}
else
if
(
missing_type
==
MissingType
::
Zero
)
{
if
(
default_bin
==
most_freq_bin
)
{
return
SplitInner
<
true
,
false
,
true
,
false
,
true
>
(
ARGUMENTS
);
}
else
{
return
SplitInner
<
true
,
false
,
false
,
false
,
true
>
(
ARGUMENTS
);
}
}
else
{
if
(
max_bin
==
most_freq_bin
+
min_bin
&&
most_freq_bin
>
0
)
{
return
SplitInner
<
false
,
true
,
false
,
true
,
true
>
(
ARGUMENTS
);
}
else
{
return
SplitInner
<
false
,
true
,
false
,
false
,
true
>
(
ARGUMENTS
);
}
}
#undef ARGUMENTS
}
data_size_t
Split
(
uint32_t
max_bin
,
uint32_t
default_bin
,
uint32_t
most_freq_bin
,
MissingType
missing_type
,
bool
default_left
,
uint32_t
threshold
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
#define ARGUMENTS \
1, max_bin, default_bin, most_freq_bin, default_left, threshold, \
data_indices, cnt, lte_indices, gt_indices
if
(
missing_type
==
MissingType
::
None
)
{
return
SplitInner
<
false
,
false
,
false
,
false
,
false
>
(
ARGUMENTS
);
}
else
if
(
missing_type
==
MissingType
::
Zero
)
{
if
(
default_bin
==
most_freq_bin
)
{
return
SplitInner
<
true
,
false
,
true
,
false
,
false
>
(
ARGUMENTS
);
}
else
{
return
SplitInner
<
true
,
false
,
false
,
false
,
false
>
(
ARGUMENTS
);
}
}
else
{
if
(
max_bin
==
most_freq_bin
+
1
&&
most_freq_bin
>
0
)
{
return
SplitInner
<
false
,
true
,
false
,
true
,
false
>
(
ARGUMENTS
);
}
else
{
return
SplitInner
<
false
,
true
,
false
,
false
,
false
>
(
ARGUMENTS
);
}
}
#undef ARGUMENTS
}
template
<
bool
USE_MIN_BIN
>
data_size_t
SplitCategoricalInner
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
,
const
uint32_t
*
threshold
,
int
num_threahold
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
{
data_size_t
lte_count
=
0
;
data_size_t
lte_count
=
0
;
data_size_t
gt_count
=
0
;
data_size_t
gt_count
=
0
;
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_indices
=
gt_indices
;
...
@@ -236,12 +322,15 @@ class DenseBin: public Bin {
...
@@ -236,12 +322,15 @@ class DenseBin: public Bin {
default_indices
=
lte_indices
;
default_indices
=
lte_indices
;
default_count
=
&
lte_count
;
default_count
=
&
lte_count
;
}
}
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
cnt
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
data_size_t
idx
=
data_indices
[
i
];
const
uint32_t
bin
=
data
_
[
idx
]
;
const
uint32_t
bin
=
data
(
idx
)
;
if
(
bin
<
min_bin
||
bin
>
max_bin
)
{
if
(
USE_MIN_BIN
&&
(
bin
<
min_bin
||
bin
>
max_bin
)
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
Common
::
FindInBitset
(
threshold
,
num_threahold
,
bin
-
min_bin
))
{
}
else
if
(
!
USE_MIN_BIN
&&
bin
==
0
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
Common
::
FindInBitset
(
threshold
,
num_threahold
,
bin
-
min_bin
))
{
lte_indices
[
lte_count
++
]
=
idx
;
lte_indices
[
lte_count
++
]
=
idx
;
}
else
{
}
else
{
gt_indices
[
gt_count
++
]
=
idx
;
gt_indices
[
gt_count
++
]
=
idx
;
...
@@ -250,57 +339,138 @@ class DenseBin: public Bin {
...
@@ -250,57 +339,138 @@ class DenseBin: public Bin {
return
lte_count
;
return
lte_count
;
}
}
data_size_t
SplitCategorical
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
,
const
uint32_t
*
threshold
,
int
num_threahold
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
return
SplitCategoricalInner
<
true
>
(
min_bin
,
max_bin
,
most_freq_bin
,
threshold
,
num_threahold
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
data_size_t
SplitCategorical
(
uint32_t
max_bin
,
uint32_t
most_freq_bin
,
const
uint32_t
*
threshold
,
int
num_threahold
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
return
SplitCategoricalInner
<
false
>
(
1
,
max_bin
,
most_freq_bin
,
threshold
,
num_threahold
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
data_size_t
num_data
()
const
override
{
return
num_data_
;
}
data_size_t
num_data
()
const
override
{
return
num_data_
;
}
void
FinishLoad
()
override
{}
void
FinishLoad
()
override
{
if
(
IS_4BIT
)
{
if
(
buf_
.
empty
())
{
return
;
}
int
len
=
(
num_data_
+
1
)
/
2
;
for
(
int
i
=
0
;
i
<
len
;
++
i
)
{
data_
[
i
]
|=
buf_
[
i
];
}
buf_
.
clear
();
}
}
void
LoadFromMemory
(
const
void
*
memory
,
const
std
::
vector
<
data_size_t
>&
local_used_indices
)
override
{
void
LoadFromMemory
(
const
void
*
memory
,
const
std
::
vector
<
data_size_t
>&
local_used_indices
)
override
{
const
VAL_T
*
mem_data
=
reinterpret_cast
<
const
VAL_T
*>
(
memory
);
const
VAL_T
*
mem_data
=
reinterpret_cast
<
const
VAL_T
*>
(
memory
);
if
(
!
local_used_indices
.
empty
())
{
if
(
!
local_used_indices
.
empty
())
{
for
(
int
i
=
0
;
i
<
num_data_
;
++
i
)
{
if
(
IS_4BIT
)
{
data_
[
i
]
=
mem_data
[
local_used_indices
[
i
]];
const
data_size_t
rest
=
num_data_
&
1
;
for
(
int
i
=
0
;
i
<
num_data_
-
rest
;
i
+=
2
)
{
// get old bins
data_size_t
idx
=
local_used_indices
[
i
];
const
auto
bin1
=
static_cast
<
uint8_t
>
(
(
mem_data
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
);
idx
=
local_used_indices
[
i
+
1
];
const
auto
bin2
=
static_cast
<
uint8_t
>
(
(
mem_data
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
);
// add
const
int
i1
=
i
>>
1
;
data_
[
i1
]
=
(
bin1
|
(
bin2
<<
4
));
}
if
(
rest
)
{
data_size_t
idx
=
local_used_indices
[
num_data_
-
1
];
data_
[
num_data_
>>
1
]
=
(
mem_data
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
}
}
else
{
for
(
int
i
=
0
;
i
<
num_data_
;
++
i
)
{
data_
[
i
]
=
mem_data
[
local_used_indices
[
i
]];
}
}
}
}
else
{
}
else
{
for
(
in
t
i
=
0
;
i
<
num_
data_
;
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
data_
.
size
()
;
++
i
)
{
data_
[
i
]
=
mem_data
[
i
];
data_
[
i
]
=
mem_data
[
i
];
}
}
}
}
}
}
void
CopySubrow
(
const
Bin
*
full_bin
,
const
data_size_t
*
used_indices
,
data_size_t
num_used_indices
)
override
{
inline
VAL_T
data
(
data_size_t
idx
)
const
{
auto
other_bin
=
dynamic_cast
<
const
DenseBin
<
VAL_T
>*>
(
full_bin
);
if
(
IS_4BIT
)
{
for
(
int
i
=
0
;
i
<
num_used_indices
;
++
i
)
{
return
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
data_
[
i
]
=
other_bin
->
data_
[
used_indices
[
i
]];
}
else
{
return
data_
[
idx
];
}
}
}
}
void
SaveBinaryToFile
(
const
VirtualFileWriter
*
writer
)
const
override
{
void
CopySubrow
(
const
Bin
*
full_bin
,
const
data_size_t
*
used_indices
,
writer
->
Write
(
data_
.
data
(),
sizeof
(
VAL_T
)
*
num_data_
);
data_size_t
num_used_indices
)
override
{
auto
other_bin
=
dynamic_cast
<
const
DenseBin
<
VAL_T
,
IS_4BIT
>*>
(
full_bin
);
if
(
IS_4BIT
)
{
const
data_size_t
rest
=
num_used_indices
&
1
;
for
(
int
i
=
0
;
i
<
num_used_indices
-
rest
;
i
+=
2
)
{
data_size_t
idx
=
used_indices
[
i
];
const
auto
bin1
=
static_cast
<
uint8_t
>
(
(
other_bin
->
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
);
idx
=
used_indices
[
i
+
1
];
const
auto
bin2
=
static_cast
<
uint8_t
>
(
(
other_bin
->
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
);
const
int
i1
=
i
>>
1
;
data_
[
i1
]
=
(
bin1
|
(
bin2
<<
4
));
}
if
(
rest
)
{
data_size_t
idx
=
used_indices
[
num_used_indices
-
1
];
data_
[
num_used_indices
>>
1
]
=
(
other_bin
->
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
}
}
else
{
for
(
int
i
=
0
;
i
<
num_used_indices
;
++
i
)
{
data_
[
i
]
=
other_bin
->
data_
[
used_indices
[
i
]];
}
}
}
}
size_t
SizesInByte
(
)
const
override
{
void
SaveBinaryToFile
(
const
VirtualFileWriter
*
writer
)
const
override
{
return
sizeof
(
VAL_T
)
*
num_
data_
;
writer
->
Write
(
data_
.
data
(),
sizeof
(
VAL_T
)
*
data_
.
size
())
;
}
}
DenseBin
<
VAL_T
>*
Clone
()
override
;
size_t
SizesInByte
()
const
override
{
return
sizeof
(
VAL_T
)
*
data_
.
size
();
}
DenseBin
<
VAL_T
,
IS_4BIT
>*
Clone
()
override
;
private:
private:
data_size_t
num_data_
;
data_size_t
num_data_
;
std
::
vector
<
VAL_T
,
Common
::
AlignmentAllocator
<
VAL_T
,
kAlignedSize
>>
data_
;
std
::
vector
<
VAL_T
,
Common
::
AlignmentAllocator
<
VAL_T
,
kAlignedSize
>>
data_
;
std
::
vector
<
uint8_t
>
buf_
;
DenseBin
<
VAL_T
>
(
const
DenseBin
<
VAL_T
>&
other
)
DenseBin
<
VAL_T
,
IS_4BIT
>
(
const
DenseBin
<
VAL_T
,
IS_4BIT
>&
other
)
:
num_data_
(
other
.
num_data_
),
data_
(
other
.
data_
)
{
:
num_data_
(
other
.
num_data_
),
data_
(
other
.
data_
)
{}
}
};
};
template
<
typename
VAL_T
>
template
<
typename
VAL_T
,
bool
IS_4BIT
>
DenseBin
<
VAL_T
>*
DenseBin
<
VAL_T
>::
Clone
()
{
DenseBin
<
VAL_T
,
IS_4BIT
>*
DenseBin
<
VAL_T
,
IS_4BIT
>::
Clone
()
{
return
new
DenseBin
<
VAL_T
>
(
*
this
);
return
new
DenseBin
<
VAL_T
,
IS_4BIT
>
(
*
this
);
}
}
template
<
typename
VAL_T
>
template
<
typename
VAL_T
,
bool
IS_4BIT
>
uint32_t
DenseBinIterator
<
VAL_T
>::
Get
(
data_size_t
idx
)
{
uint32_t
DenseBinIterator
<
VAL_T
,
IS_4BIT
>::
Get
(
data_size_t
idx
)
{
auto
ret
=
bin_data_
->
data
_
[
idx
]
;
auto
ret
=
bin_data_
->
data
(
idx
)
;
if
(
ret
>=
min_bin_
&&
ret
<=
max_bin_
)
{
if
(
ret
>=
min_bin_
&&
ret
<=
max_bin_
)
{
return
ret
-
min_bin_
+
offset_
;
return
ret
-
min_bin_
+
offset_
;
}
else
{
}
else
{
...
@@ -308,15 +478,17 @@ uint32_t DenseBinIterator<VAL_T>::Get(data_size_t idx) {
...
@@ -308,15 +478,17 @@ uint32_t DenseBinIterator<VAL_T>::Get(data_size_t idx) {
}
}
}
}
template
<
typename
VAL_T
>
template
<
typename
VAL_T
,
bool
IS_4BIT
>
inline
uint32_t
DenseBinIterator
<
VAL_T
>::
RawGet
(
data_size_t
idx
)
{
inline
uint32_t
DenseBinIterator
<
VAL_T
,
IS_4BIT
>::
RawGet
(
data_size_t
idx
)
{
return
bin_data_
->
data
_
[
idx
]
;
return
bin_data_
->
data
(
idx
)
;
}
}
template
<
typename
VAL_T
>
template
<
typename
VAL_T
,
bool
IS_4BIT
>
BinIterator
*
DenseBin
<
VAL_T
>::
GetIterator
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
)
const
{
BinIterator
*
DenseBin
<
VAL_T
,
IS_4BIT
>::
GetIterator
(
return
new
DenseBinIterator
<
VAL_T
>
(
this
,
min_bin
,
max_bin
,
most_freq_bin
);
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
)
const
{
return
new
DenseBinIterator
<
VAL_T
,
IS_4BIT
>
(
this
,
min_bin
,
max_bin
,
most_freq_bin
);
}
}
}
// namespace LightGBM
}
// namespace LightGBM
#endif
// LightGBM_IO_DENSE_BIN_HPP_
#endif // LightGBM_IO_DENSE_BIN_HPP_
src/io/dense_nbits_bin.hpp
deleted
100644 → 0
View file @
1a48fd26
/*!
* Copyright (c) 2017 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifndef LIGHTGBM_IO_DENSE_NBITS_BIN_HPP_
#define LIGHTGBM_IO_DENSE_NBITS_BIN_HPP_
#include <LightGBM/bin.h>
#include <cstdint>
#include <cstring>
#include <vector>
namespace
LightGBM
{
class
Dense4bitsBin
;
class
Dense4bitsBinIterator
:
public
BinIterator
{
public:
explicit
Dense4bitsBinIterator
(
const
Dense4bitsBin
*
bin_data
,
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
)
:
bin_data_
(
bin_data
),
min_bin_
(
static_cast
<
uint8_t
>
(
min_bin
)),
max_bin_
(
static_cast
<
uint8_t
>
(
max_bin
)),
most_freq_bin_
(
static_cast
<
uint8_t
>
(
most_freq_bin
))
{
if
(
most_freq_bin_
==
0
)
{
offset_
=
1
;
}
else
{
offset_
=
0
;
}
}
inline
uint32_t
RawGet
(
data_size_t
idx
)
override
;
inline
uint32_t
Get
(
data_size_t
idx
)
override
;
inline
void
Reset
(
data_size_t
)
override
{}
private:
const
Dense4bitsBin
*
bin_data_
;
uint8_t
min_bin_
;
uint8_t
max_bin_
;
uint8_t
most_freq_bin_
;
uint8_t
offset_
;
};
class
Dense4bitsBin
:
public
Bin
{
public:
friend
Dense4bitsBinIterator
;
explicit
Dense4bitsBin
(
data_size_t
num_data
)
:
num_data_
(
num_data
)
{
int
len
=
(
num_data_
+
1
)
/
2
;
data_
.
resize
(
len
,
static_cast
<
uint8_t
>
(
0
));
buf_
=
std
::
vector
<
uint8_t
>
(
len
,
static_cast
<
uint8_t
>
(
0
));
}
~
Dense4bitsBin
()
{
}
void
Push
(
int
,
data_size_t
idx
,
uint32_t
value
)
override
{
const
int
i1
=
idx
>>
1
;
const
int
i2
=
(
idx
&
1
)
<<
2
;
const
uint8_t
val
=
static_cast
<
uint8_t
>
(
value
)
<<
i2
;
if
(
i2
==
0
)
{
data_
[
i1
]
=
val
;
}
else
{
buf_
[
i1
]
=
val
;
}
}
void
ReSize
(
data_size_t
num_data
)
override
{
if
(
num_data_
!=
num_data
)
{
num_data_
=
num_data
;
const
int
len
=
(
num_data_
+
1
)
/
2
;
data_
.
resize
(
len
);
}
}
inline
BinIterator
*
GetIterator
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
)
const
override
;
template
<
bool
USE_INDICES
,
bool
USE_PREFETCH
,
bool
USE_HESSIAN
>
void
ConstructHistogramInner
(
const
data_size_t
*
data_indices
,
data_size_t
start
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
hist_t
*
out
)
const
{
data_size_t
i
=
start
;
hist_t
*
grad
=
out
;
hist_t
*
hess
=
out
+
1
;
hist_cnt_t
*
cnt
=
reinterpret_cast
<
hist_cnt_t
*>
(
hess
);
if
(
USE_PREFETCH
)
{
const
data_size_t
pf_offset
=
64
;
const
data_size_t
pf_end
=
end
-
pf_offset
;
for
(;
i
<
pf_end
;
++
i
)
{
const
auto
idx
=
USE_INDICES
?
data_indices
[
i
]
:
i
;
const
auto
pf_idx
=
USE_INDICES
?
data_indices
[
i
+
pf_offset
]
:
i
+
pf_offset
;
PREFETCH_T0
(
data_
.
data
()
+
(
pf_idx
>>
1
));
const
uint8_t
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
const
uint8_t
ti
=
static_cast
<
uint8_t
>
(
bin
)
<<
1
;
if
(
USE_HESSIAN
)
{
grad
[
ti
]
+=
ordered_gradients
[
i
];
hess
[
ti
]
+=
ordered_hessians
[
i
];
}
else
{
grad
[
ti
]
+=
ordered_gradients
[
i
];
++
cnt
[
ti
];
}
}
}
for
(;
i
<
end
;
++
i
)
{
const
auto
idx
=
USE_INDICES
?
data_indices
[
i
]
:
i
;
const
uint8_t
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
const
uint8_t
ti
=
static_cast
<
uint8_t
>
(
bin
)
<<
1
;
if
(
USE_HESSIAN
)
{
grad
[
ti
]
+=
ordered_gradients
[
i
];
hess
[
ti
]
+=
ordered_hessians
[
i
];
}
else
{
grad
[
ti
]
+=
ordered_gradients
[
i
];
++
cnt
[
ti
];
}
}
}
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
start
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
hist_t
*
out
)
const
override
{
ConstructHistogramInner
<
true
,
true
,
true
>
(
data_indices
,
start
,
end
,
ordered_gradients
,
ordered_hessians
,
out
);
}
void
ConstructHistogram
(
data_size_t
start
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
hist_t
*
out
)
const
override
{
ConstructHistogramInner
<
false
,
false
,
true
>
(
nullptr
,
start
,
end
,
ordered_gradients
,
ordered_hessians
,
out
);
}
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
start
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
hist_t
*
out
)
const
override
{
ConstructHistogramInner
<
true
,
true
,
false
>
(
data_indices
,
start
,
end
,
ordered_gradients
,
nullptr
,
out
);
}
void
ConstructHistogram
(
data_size_t
start
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
hist_t
*
out
)
const
override
{
ConstructHistogramInner
<
false
,
false
,
false
>
(
nullptr
,
start
,
end
,
ordered_gradients
,
nullptr
,
out
);
}
data_size_t
Split
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
default_bin
,
uint32_t
most_freq_bin
,
MissingType
missing_type
,
bool
default_left
,
uint32_t
threshold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
if
(
num_data
<=
0
)
{
return
0
;
}
uint8_t
th
=
static_cast
<
uint8_t
>
(
threshold
+
min_bin
);
const
uint8_t
minb
=
static_cast
<
uint8_t
>
(
min_bin
);
const
uint8_t
maxb
=
static_cast
<
uint8_t
>
(
max_bin
);
uint8_t
t_zero_bin
=
static_cast
<
uint8_t
>
(
min_bin
+
default_bin
);
uint8_t
t_most_freq_bin
=
static_cast
<
uint8_t
>
(
min_bin
+
most_freq_bin
);
if
(
most_freq_bin
==
0
)
{
th
-=
1
;
t_zero_bin
-=
1
;
t_most_freq_bin
-=
1
;
}
data_size_t
lte_count
=
0
;
data_size_t
gt_count
=
0
;
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_count
=
&
gt_count
;
data_size_t
*
missing_default_indices
=
gt_indices
;
data_size_t
*
missing_default_count
=
&
gt_count
;
if
(
most_freq_bin
<=
threshold
)
{
default_indices
=
lte_indices
;
default_count
=
&
lte_count
;
}
if
(
missing_type
==
MissingType
::
NaN
)
{
if
(
default_left
)
{
missing_default_indices
=
lte_indices
;
missing_default_count
=
&
lte_count
;
}
if
(
t_most_freq_bin
==
maxb
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
uint8_t
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
if
(
t_most_freq_bin
==
bin
||
bin
<
minb
||
bin
>
maxb
)
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
else
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
uint8_t
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
if
(
bin
==
maxb
)
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
<
minb
||
bin
>
maxb
||
t_most_freq_bin
==
bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
}
else
{
if
((
default_left
&&
missing_type
==
MissingType
::
Zero
)
||
(
default_bin
<=
threshold
&&
missing_type
!=
MissingType
::
Zero
))
{
missing_default_indices
=
lte_indices
;
missing_default_count
=
&
lte_count
;
}
if
(
default_bin
==
most_freq_bin
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
uint8_t
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
if
(
bin
<
minb
||
bin
>
maxb
||
t_most_freq_bin
==
bin
)
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
else
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
uint8_t
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
if
(
bin
==
t_zero_bin
)
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
<
minb
||
bin
>
maxb
||
t_most_freq_bin
==
bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
}
return
lte_count
;
}
data_size_t
SplitCategorical
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
,
const
uint32_t
*
threshold
,
int
num_threahold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
if
(
num_data
<=
0
)
{
return
0
;
}
data_size_t
lte_count
=
0
;
data_size_t
gt_count
=
0
;
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_count
=
&
gt_count
;
if
(
Common
::
FindInBitset
(
threshold
,
num_threahold
,
most_freq_bin
))
{
default_indices
=
lte_indices
;
default_count
=
&
lte_count
;
}
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
uint32_t
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
if
(
bin
<
min_bin
||
bin
>
max_bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
Common
::
FindInBitset
(
threshold
,
num_threahold
,
bin
-
min_bin
))
{
lte_indices
[
lte_count
++
]
=
idx
;
}
else
{
gt_indices
[
gt_count
++
]
=
idx
;
}
}
return
lte_count
;
}
data_size_t
num_data
()
const
override
{
return
num_data_
;
}
void
FinishLoad
()
override
{
if
(
buf_
.
empty
())
{
return
;
}
int
len
=
(
num_data_
+
1
)
/
2
;
for
(
int
i
=
0
;
i
<
len
;
++
i
)
{
data_
[
i
]
|=
buf_
[
i
];
}
buf_
.
clear
();
}
void
LoadFromMemory
(
const
void
*
memory
,
const
std
::
vector
<
data_size_t
>&
local_used_indices
)
override
{
const
uint8_t
*
mem_data
=
reinterpret_cast
<
const
uint8_t
*>
(
memory
);
if
(
!
local_used_indices
.
empty
())
{
const
data_size_t
rest
=
num_data_
&
1
;
for
(
int
i
=
0
;
i
<
num_data_
-
rest
;
i
+=
2
)
{
// get old bins
data_size_t
idx
=
local_used_indices
[
i
];
const
auto
bin1
=
static_cast
<
uint8_t
>
((
mem_data
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
);
idx
=
local_used_indices
[
i
+
1
];
const
auto
bin2
=
static_cast
<
uint8_t
>
((
mem_data
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
);
// add
const
int
i1
=
i
>>
1
;
data_
[
i1
]
=
(
bin1
|
(
bin2
<<
4
));
}
if
(
rest
)
{
data_size_t
idx
=
local_used_indices
[
num_data_
-
1
];
data_
[
num_data_
>>
1
]
=
(
mem_data
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
}
}
else
{
for
(
size_t
i
=
0
;
i
<
data_
.
size
();
++
i
)
{
data_
[
i
]
=
mem_data
[
i
];
}
}
}
void
CopySubrow
(
const
Bin
*
full_bin
,
const
data_size_t
*
used_indices
,
data_size_t
num_used_indices
)
override
{
auto
other_bin
=
dynamic_cast
<
const
Dense4bitsBin
*>
(
full_bin
);
const
data_size_t
rest
=
num_used_indices
&
1
;
for
(
int
i
=
0
;
i
<
num_used_indices
-
rest
;
i
+=
2
)
{
data_size_t
idx
=
used_indices
[
i
];
const
auto
bin1
=
static_cast
<
uint8_t
>
((
other_bin
->
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
);
idx
=
used_indices
[
i
+
1
];
const
auto
bin2
=
static_cast
<
uint8_t
>
((
other_bin
->
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
);
const
int
i1
=
i
>>
1
;
data_
[
i1
]
=
(
bin1
|
(
bin2
<<
4
));
}
if
(
rest
)
{
data_size_t
idx
=
used_indices
[
num_used_indices
-
1
];
data_
[
num_used_indices
>>
1
]
=
(
other_bin
->
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
}
}
void
SaveBinaryToFile
(
const
VirtualFileWriter
*
writer
)
const
override
{
writer
->
Write
(
data_
.
data
(),
sizeof
(
uint8_t
)
*
data_
.
size
());
}
size_t
SizesInByte
()
const
override
{
return
sizeof
(
uint8_t
)
*
data_
.
size
();
}
Dense4bitsBin
*
Clone
()
override
{
return
new
Dense4bitsBin
(
*
this
);
}
protected:
Dense4bitsBin
(
const
Dense4bitsBin
&
other
)
:
num_data_
(
other
.
num_data_
),
data_
(
other
.
data_
),
buf_
(
other
.
buf_
)
{
}
data_size_t
num_data_
;
std
::
vector
<
uint8_t
,
Common
::
AlignmentAllocator
<
uint8_t
,
kAlignedSize
>>
data_
;
std
::
vector
<
uint8_t
>
buf_
;
};
uint32_t
Dense4bitsBinIterator
::
Get
(
data_size_t
idx
)
{
const
auto
bin
=
(
bin_data_
->
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
if
(
bin
>=
min_bin_
&&
bin
<=
max_bin_
)
{
return
bin
-
min_bin_
+
offset_
;
}
else
{
return
most_freq_bin_
;
}
}
uint32_t
Dense4bitsBinIterator
::
RawGet
(
data_size_t
idx
)
{
return
(
bin_data_
->
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
}
inline
BinIterator
*
Dense4bitsBin
::
GetIterator
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
)
const
{
return
new
Dense4bitsBinIterator
(
this
,
min_bin
,
max_bin
,
most_freq_bin
);
}
}
// namespace LightGBM
#endif // LIGHTGBM_IO_DENSE_NBITS_BIN_HPP_
src/io/sparse_bin.hpp
View file @
bcad692e
/*!
/*!
* Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
* Licensed under the MIT License. See LICENSE file in the project root for
* license information.
*/
*/
#ifndef LIGHTGBM_IO_SPARSE_BIN_HPP_
#ifndef LIGHTGBM_IO_SPARSE_BIN_HPP_
#define LIGHTGBM_IO_SPARSE_BIN_HPP_
#define LIGHTGBM_IO_SPARSE_BIN_HPP_
...
@@ -9,27 +10,29 @@
...
@@ -9,27 +10,29 @@
#include <LightGBM/utils/log.h>
#include <LightGBM/utils/log.h>
#include <LightGBM/utils/openmp_wrapper.h>
#include <LightGBM/utils/openmp_wrapper.h>
#include <limits>
#include <algorithm>
#include <algorithm>
#include <cstdint>
#include <cstdint>
#include <cstring>
#include <cstring>
#include <limits>
#include <utility>
#include <utility>
#include <vector>
#include <vector>
namespace
LightGBM
{
namespace
LightGBM
{
template
<
typename
VAL_T
>
class
SparseBin
;
template
<
typename
VAL_T
>
class
SparseBin
;
const
size_t
kNumFastIndex
=
64
;
const
size_t
kNumFastIndex
=
64
;
template
<
typename
VAL_T
>
template
<
typename
VAL_T
>
class
SparseBinIterator
:
public
BinIterator
{
class
SparseBinIterator
:
public
BinIterator
{
public:
public:
SparseBinIterator
(
const
SparseBin
<
VAL_T
>*
bin_data
,
SparseBinIterator
(
const
SparseBin
<
VAL_T
>*
bin_data
,
uint32_t
min_bin
,
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
)
uint32_t
max_bin
,
uint32_t
most_freq_bin
)
:
bin_data_
(
bin_data
),
min_bin_
(
static_cast
<
VAL_T
>
(
min_bin
)),
:
bin_data_
(
bin_data
),
max_bin_
(
static_cast
<
VAL_T
>
(
max_bin
)),
min_bin_
(
static_cast
<
VAL_T
>
(
min_bin
)),
most_freq_bin_
(
static_cast
<
VAL_T
>
(
most_freq_bin
))
{
max_bin_
(
static_cast
<
VAL_T
>
(
max_bin
)),
most_freq_bin_
(
static_cast
<
VAL_T
>
(
most_freq_bin
))
{
if
(
most_freq_bin_
==
0
)
{
if
(
most_freq_bin_
==
0
)
{
offset_
=
1
;
offset_
=
1
;
}
else
{
}
else
{
...
@@ -38,7 +41,7 @@ class SparseBinIterator: public BinIterator {
...
@@ -38,7 +41,7 @@ class SparseBinIterator: public BinIterator {
Reset
(
0
);
Reset
(
0
);
}
}
SparseBinIterator
(
const
SparseBin
<
VAL_T
>*
bin_data
,
data_size_t
start_idx
)
SparseBinIterator
(
const
SparseBin
<
VAL_T
>*
bin_data
,
data_size_t
start_idx
)
:
bin_data_
(
bin_data
)
{
:
bin_data_
(
bin_data
)
{
Reset
(
start_idx
);
Reset
(
start_idx
);
}
}
...
@@ -67,22 +70,18 @@ class SparseBinIterator: public BinIterator {
...
@@ -67,22 +70,18 @@ class SparseBinIterator: public BinIterator {
};
};
template
<
typename
VAL_T
>
template
<
typename
VAL_T
>
class
SparseBin
:
public
Bin
{
class
SparseBin
:
public
Bin
{
public:
public:
friend
class
SparseBinIterator
<
VAL_T
>
;
friend
class
SparseBinIterator
<
VAL_T
>
;
explicit
SparseBin
(
data_size_t
num_data
)
explicit
SparseBin
(
data_size_t
num_data
)
:
num_data_
(
num_data
)
{
:
num_data_
(
num_data
)
{
int
num_threads
=
OMP_NUM_THREADS
();
int
num_threads
=
OMP_NUM_THREADS
();
push_buffers_
.
resize
(
num_threads
);
push_buffers_
.
resize
(
num_threads
);
}
}
~
SparseBin
()
{
~
SparseBin
()
{}
}
void
ReSize
(
data_size_t
num_data
)
override
{
void
ReSize
(
data_size_t
num_data
)
override
{
num_data_
=
num_data
;
}
num_data_
=
num_data
;
}
void
Push
(
int
tid
,
data_size_t
idx
,
uint32_t
value
)
override
{
void
Push
(
int
tid
,
data_size_t
idx
,
uint32_t
value
)
override
{
auto
cur_bin
=
static_cast
<
VAL_T
>
(
value
);
auto
cur_bin
=
static_cast
<
VAL_T
>
(
value
);
...
@@ -91,36 +90,49 @@ class SparseBin: public Bin {
...
@@ -91,36 +90,49 @@ class SparseBin: public Bin {
}
}
}
}
BinIterator
*
GetIterator
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
)
const
override
;
BinIterator
*
GetIterator
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
)
const
override
;
#define ACC_GH(hist, i, g, h) \
#define ACC_GH(hist, i, g, h)
\
const auto ti = static_cast<int>(i) << 1; \
const auto ti = static_cast<int>(i) << 1; \
hist[ti] += g; \
hist[ti] += g;
\
hist[ti + 1] += h;
\
hist[ti + 1] += h;
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
start
,
data_size_t
end
,
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
start
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
hist_t
*
out
)
const
override
{
data_size_t
end
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
hist_t
*
out
)
const
override
{
data_size_t
i_delta
,
cur_pos
;
data_size_t
i_delta
,
cur_pos
;
InitIndex
(
data_indices
[
start
],
&
i_delta
,
&
cur_pos
);
InitIndex
(
data_indices
[
start
],
&
i_delta
,
&
cur_pos
);
data_size_t
i
=
start
;
data_size_t
i
=
start
;
for
(;;)
{
for
(;;)
{
if
(
cur_pos
<
data_indices
[
i
])
{
if
(
cur_pos
<
data_indices
[
i
])
{
cur_pos
+=
deltas_
[
++
i_delta
];
cur_pos
+=
deltas_
[
++
i_delta
];
if
(
i_delta
>=
num_vals_
)
{
break
;
}
if
(
i_delta
>=
num_vals_
)
{
break
;
}
}
else
if
(
cur_pos
>
data_indices
[
i
])
{
}
else
if
(
cur_pos
>
data_indices
[
i
])
{
if
(
++
i
>=
end
)
{
break
;
}
if
(
++
i
>=
end
)
{
break
;
}
}
else
{
}
else
{
const
VAL_T
bin
=
vals_
[
i_delta
];
const
VAL_T
bin
=
vals_
[
i_delta
];
ACC_GH
(
out
,
bin
,
ordered_gradients
[
i
],
ordered_hessians
[
i
]);
ACC_GH
(
out
,
bin
,
ordered_gradients
[
i
],
ordered_hessians
[
i
]);
if
(
++
i
>=
end
)
{
break
;
}
if
(
++
i
>=
end
)
{
break
;
}
cur_pos
+=
deltas_
[
++
i_delta
];
cur_pos
+=
deltas_
[
++
i_delta
];
if
(
i_delta
>=
num_vals_
)
{
break
;
}
if
(
i_delta
>=
num_vals_
)
{
break
;
}
}
}
}
}
}
}
void
ConstructHistogram
(
data_size_t
start
,
data_size_t
end
,
void
ConstructHistogram
(
data_size_t
start
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
hist_t
*
out
)
const
override
{
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
hist_t
*
out
)
const
override
{
data_size_t
i_delta
,
cur_pos
;
data_size_t
i_delta
,
cur_pos
;
InitIndex
(
start
,
&
i_delta
,
&
cur_pos
);
InitIndex
(
start
,
&
i_delta
,
&
cur_pos
);
while
(
cur_pos
<
start
&&
i_delta
<
num_vals_
)
{
while
(
cur_pos
<
start
&&
i_delta
<
num_vals_
)
{
...
@@ -133,8 +145,9 @@ class SparseBin: public Bin {
...
@@ -133,8 +145,9 @@ class SparseBin: public Bin {
}
}
}
}
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
start
,
data_size_t
end
,
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
start
,
const
score_t
*
ordered_gradients
,
hist_t
*
out
)
const
override
{
data_size_t
end
,
const
score_t
*
ordered_gradients
,
hist_t
*
out
)
const
override
{
data_size_t
i_delta
,
cur_pos
;
data_size_t
i_delta
,
cur_pos
;
InitIndex
(
data_indices
[
start
],
&
i_delta
,
&
cur_pos
);
InitIndex
(
data_indices
[
start
],
&
i_delta
,
&
cur_pos
);
data_size_t
i
=
start
;
data_size_t
i
=
start
;
...
@@ -143,22 +156,31 @@ class SparseBin: public Bin {
...
@@ -143,22 +156,31 @@ class SparseBin: public Bin {
for
(;;)
{
for
(;;)
{
if
(
cur_pos
<
data_indices
[
i
])
{
if
(
cur_pos
<
data_indices
[
i
])
{
cur_pos
+=
deltas_
[
++
i_delta
];
cur_pos
+=
deltas_
[
++
i_delta
];
if
(
i_delta
>=
num_vals_
)
{
break
;
}
if
(
i_delta
>=
num_vals_
)
{
break
;
}
}
else
if
(
cur_pos
>
data_indices
[
i
])
{
}
else
if
(
cur_pos
>
data_indices
[
i
])
{
if
(
++
i
>=
end
)
{
break
;
}
if
(
++
i
>=
end
)
{
break
;
}
}
else
{
}
else
{
const
uint32_t
ti
=
static_cast
<
uint32_t
>
(
vals_
[
i_delta
])
<<
1
;
const
uint32_t
ti
=
static_cast
<
uint32_t
>
(
vals_
[
i_delta
])
<<
1
;
grad
[
ti
]
+=
ordered_gradients
[
i
];
grad
[
ti
]
+=
ordered_gradients
[
i
];
++
cnt
[
ti
];
++
cnt
[
ti
];
if
(
++
i
>=
end
)
{
break
;
}
if
(
++
i
>=
end
)
{
break
;
}
cur_pos
+=
deltas_
[
++
i_delta
];
cur_pos
+=
deltas_
[
++
i_delta
];
if
(
i_delta
>=
num_vals_
)
{
break
;
}
if
(
i_delta
>=
num_vals_
)
{
break
;
}
}
}
}
}
}
}
void
ConstructHistogram
(
data_size_t
start
,
data_size_t
end
,
void
ConstructHistogram
(
data_size_t
start
,
data_size_t
end
,
const
score_t
*
ordered_gradients
,
hist_t
*
out
)
const
override
{
const
score_t
*
ordered_gradients
,
hist_t
*
out
)
const
override
{
data_size_t
i_delta
,
cur_pos
;
data_size_t
i_delta
,
cur_pos
;
InitIndex
(
start
,
&
i_delta
,
&
cur_pos
);
InitIndex
(
start
,
&
i_delta
,
&
cur_pos
);
hist_t
*
grad
=
out
;
hist_t
*
grad
=
out
;
...
@@ -173,17 +195,17 @@ class SparseBin: public Bin {
...
@@ -173,17 +195,17 @@ class SparseBin: public Bin {
cur_pos
+=
deltas_
[
++
i_delta
];
cur_pos
+=
deltas_
[
++
i_delta
];
}
}
}
}
#undef ACC_GH
#undef ACC_GH
inline
void
NextNonzeroFast
(
data_size_t
*
i_delta
,
data_size_t
*
cur_pos
)
const
{
inline
void
NextNonzeroFast
(
data_size_t
*
i_delta
,
data_size_t
*
cur_pos
)
const
{
*
cur_pos
+=
deltas_
[
++
(
*
i_delta
)];
*
cur_pos
+=
deltas_
[
++
(
*
i_delta
)];
if
(
*
i_delta
>=
num_vals_
)
{
if
(
*
i_delta
>=
num_vals_
)
{
*
cur_pos
=
num_data_
;
*
cur_pos
=
num_data_
;
}
}
}
}
inline
bool
NextNonzero
(
data_size_t
*
i_delta
,
inline
bool
NextNonzero
(
data_size_t
*
i_delta
,
data_size_t
*
cur_pos
)
const
{
data_size_t
*
cur_pos
)
const
{
*
cur_pos
+=
deltas_
[
++
(
*
i_delta
)];
*
cur_pos
+=
deltas_
[
++
(
*
i_delta
)];
if
(
*
i_delta
<
num_vals_
)
{
if
(
*
i_delta
<
num_vals_
)
{
return
true
;
return
true
;
...
@@ -193,96 +215,82 @@ class SparseBin: public Bin {
...
@@ -193,96 +215,82 @@ class SparseBin: public Bin {
}
}
}
}
template
<
bool
MISS_IS_ZERO
,
bool
MISS_IS_NA
,
bool
MFB_IS_ZERO
,
data_size_t
Split
(
bool
MFB_IS_NA
,
bool
USE_MIN_BIN
>
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
default_bin
,
uint32_t
most_freq_bin
,
data_size_t
SplitInner
(
uint32_t
min_bin
,
uint32_t
max_bin
,
MissingType
missing_type
,
bool
default_left
,
uint32_t
default_bin
,
uint32_t
most_freq_bin
,
uint32_t
threshold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
bool
default_left
,
uint32_t
threshold
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
if
(
num_data
<=
0
)
{
return
0
;
}
data_size_t
*
lte_indices
,
VAL_T
th
=
static_cast
<
VAL_T
>
(
threshold
+
min_bin
);
data_size_t
*
gt_indices
)
const
{
const
VAL_T
minb
=
static_cast
<
VAL_T
>
(
min_bin
);
auto
th
=
static_cast
<
VAL_T
>
(
threshold
+
min_bin
);
const
VAL_T
maxb
=
static_cast
<
VAL_T
>
(
max_bin
);
auto
t_zero_bin
=
static_cast
<
VAL_T
>
(
min_bin
+
default_bin
);
VAL_T
t_zero_bin
=
static_cast
<
VAL_T
>
(
min_bin
+
default_bin
);
VAL_T
t_most_freq_bin
=
static_cast
<
VAL_T
>
(
min_bin
+
most_freq_bin
);
if
(
most_freq_bin
==
0
)
{
if
(
most_freq_bin
==
0
)
{
th
-=
1
;
--
th
;
t_zero_bin
-=
1
;
--
t_zero_bin
;
t_most_freq_bin
-=
1
;
}
}
const
auto
minb
=
static_cast
<
VAL_T
>
(
min_bin
);
const
auto
maxb
=
static_cast
<
VAL_T
>
(
max_bin
);
data_size_t
lte_count
=
0
;
data_size_t
lte_count
=
0
;
data_size_t
gt_count
=
0
;
data_size_t
gt_count
=
0
;
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_count
=
&
gt_count
;
data_size_t
*
default_count
=
&
gt_count
;
data_size_t
*
missing_default_indices
=
gt_indices
;
data_size_t
*
missing_default_indices
=
gt_indices
;
data_size_t
*
missing_default_count
=
&
gt_count
;
data_size_t
*
missing_default_count
=
&
gt_count
;
SparseBinIterator
<
VAL_T
>
iterator
(
this
,
data_indices
[
0
]);
if
(
most_freq_bin
<=
threshold
)
{
if
(
most_freq_bin
<=
threshold
)
{
default_indices
=
lte_indices
;
default_indices
=
lte_indices
;
default_count
=
&
lte_count
;
default_count
=
&
lte_count
;
}
}
if
(
missing_type
==
MissingType
::
NaN
)
{
if
(
MISS_IS_ZERO
||
MISS_IS_NA
)
{
if
(
default_left
)
{
if
(
default_left
)
{
missing_default_indices
=
lte_indices
;
missing_default_indices
=
lte_indices
;
missing_default_count
=
&
lte_count
;
missing_default_count
=
&
lte_count
;
}
}
if
(
t_most_freq_bin
==
maxb
)
{
}
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
SparseBinIterator
<
VAL_T
>
iterator
(
this
,
data_indices
[
0
]);
const
data_size_t
idx
=
data_indices
[
i
];
if
(
min_bin
<
max_bin
)
{
const
VAL_T
bin
=
iterator
.
InnerRawGet
(
idx
);
for
(
data_size_t
i
=
0
;
i
<
cnt
;
++
i
)
{
if
(
t_most_freq_bin
==
bin
||
bin
<
minb
||
bin
>
maxb
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
auto
bin
=
iterator
.
InnerRawGet
(
idx
);
if
((
MISS_IS_ZERO
&&
!
MFB_IS_ZERO
&&
bin
==
t_zero_bin
)
||
(
MISS_IS_NA
&&
!
MFB_IS_NA
&&
bin
==
maxb
))
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
((
USE_MIN_BIN
&&
(
bin
<
minb
||
bin
>
maxb
))
||
(
!
USE_MIN_BIN
&&
bin
==
0
))
{
if
((
MISS_IS_NA
&&
MFB_IS_NA
)
||
(
MISS_IS_ZERO
&&
MFB_IS_ZERO
))
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
else
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
VAL_T
bin
=
iterator
.
InnerRawGet
(
idx
);
if
(
bin
==
maxb
)
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
<
minb
||
bin
>
maxb
||
t_most_freq_bin
==
bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
}
}
else
{
}
else
{
if
((
default_left
&&
missing_type
==
MissingType
::
Zero
)
data_size_t
*
max_bin_indices
=
gt_indices
;
||
(
default_bin
<=
threshold
&&
missing_type
!=
MissingType
::
Zero
))
{
data_size_t
*
max_bin_count
=
&
gt_count
;
missing_default_indices
=
lte_indices
;
if
(
maxb
<=
th
)
{
missing_default_count
=
&
lte_count
;
max_bin_indices
=
lte_indices
;
max_bin_count
=
&
lte_count
;
}
}
if
(
default_bin
==
most_freq_bin
)
{
for
(
data_size_t
i
=
0
;
i
<
cnt
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
data_size_t
idx
=
data_indices
[
i
];
const
auto
bin
=
iterator
.
InnerRawGet
(
idx
);
const
VAL_T
bin
=
iterator
.
InnerRawGet
(
idx
);
if
(
MISS_IS_ZERO
&&
!
MFB_IS_ZERO
&&
bin
==
t_zero_bin
)
{
if
(
bin
<
minb
||
bin
>
maxb
||
t_most_freq_bin
==
bin
)
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
!=
maxb
)
{
if
((
MISS_IS_NA
&&
MFB_IS_NA
)
||
(
MISS_IS_ZERO
&&
MFB_IS_ZERO
))
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
}
else
{
lt
e
_indices
[
lt
e
_count
++
]
=
idx
;
defau
lt_indices
[
(
*
defau
lt_count
)
++
]
=
idx
;
}
}
}
}
else
{
}
else
{
if
(
MISS_IS_NA
&&
!
MFB_IS_NA
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
VAL_T
bin
=
iterator
.
InnerRawGet
(
idx
);
if
(
bin
==
t_zero_bin
)
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
<
minb
||
bin
>
maxb
||
t_most_freq_bin
==
bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
}
else
{
lte
_indices
[
lte
_count
++
]
=
idx
;
max_bin
_indices
[
(
*
max_bin
_count
)
++
]
=
idx
;
}
}
}
}
}
}
...
@@ -290,26 +298,85 @@ class SparseBin: public Bin {
...
@@ -290,26 +298,85 @@ class SparseBin: public Bin {
return
lte_count
;
return
lte_count
;
}
}
data_size_t
SplitCategorical
(
data_size_t
Split
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
default_bin
,
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
,
uint32_t
most_freq_bin
,
MissingType
missing_type
,
const
uint32_t
*
threshold
,
int
num_threahold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
bool
default_left
,
uint32_t
threshold
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
if
(
num_data
<=
0
)
{
return
0
;
}
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
#define ARGUMENTS \
min_bin, max_bin, default_bin, most_freq_bin, default_left, threshold, \
data_indices, cnt, lte_indices, gt_indices
if
(
missing_type
==
MissingType
::
None
)
{
return
SplitInner
<
false
,
false
,
false
,
false
,
true
>
(
ARGUMENTS
);
}
else
if
(
missing_type
==
MissingType
::
Zero
)
{
if
(
default_bin
==
most_freq_bin
)
{
return
SplitInner
<
true
,
false
,
true
,
false
,
true
>
(
ARGUMENTS
);
}
else
{
return
SplitInner
<
true
,
false
,
false
,
false
,
true
>
(
ARGUMENTS
);
}
}
else
{
if
(
max_bin
==
most_freq_bin
+
min_bin
&&
most_freq_bin
>
0
)
{
return
SplitInner
<
false
,
true
,
false
,
true
,
true
>
(
ARGUMENTS
);
}
else
{
return
SplitInner
<
false
,
true
,
false
,
false
,
true
>
(
ARGUMENTS
);
}
}
#undef ARGUMENTS
}
data_size_t
Split
(
uint32_t
max_bin
,
uint32_t
default_bin
,
uint32_t
most_freq_bin
,
MissingType
missing_type
,
bool
default_left
,
uint32_t
threshold
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
#define ARGUMENTS \
1, max_bin, default_bin, most_freq_bin, default_left, threshold, \
data_indices, cnt, lte_indices, gt_indices
if
(
missing_type
==
MissingType
::
None
)
{
return
SplitInner
<
false
,
false
,
false
,
false
,
false
>
(
ARGUMENTS
);
}
else
if
(
missing_type
==
MissingType
::
Zero
)
{
if
(
default_bin
==
most_freq_bin
)
{
return
SplitInner
<
true
,
false
,
true
,
false
,
false
>
(
ARGUMENTS
);
}
else
{
return
SplitInner
<
true
,
false
,
false
,
false
,
false
>
(
ARGUMENTS
);
}
}
else
{
if
(
max_bin
==
most_freq_bin
+
1
&&
most_freq_bin
>
0
)
{
return
SplitInner
<
false
,
true
,
false
,
true
,
false
>
(
ARGUMENTS
);
}
else
{
return
SplitInner
<
false
,
true
,
false
,
false
,
false
>
(
ARGUMENTS
);
}
}
#undef ARGUMENTS
}
template
<
bool
USE_MIN_BIN
>
data_size_t
SplitCategoricalInner
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
,
const
uint32_t
*
threshold
,
int
num_threahold
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
{
data_size_t
lte_count
=
0
;
data_size_t
lte_count
=
0
;
data_size_t
gt_count
=
0
;
data_size_t
gt_count
=
0
;
SparseBinIterator
<
VAL_T
>
iterator
(
this
,
data_indices
[
0
]);
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_count
=
&
gt_count
;
data_size_t
*
default_count
=
&
gt_count
;
SparseBinIterator
<
VAL_T
>
iterator
(
this
,
data_indices
[
0
]);
if
(
Common
::
FindInBitset
(
threshold
,
num_threahold
,
most_freq_bin
))
{
if
(
Common
::
FindInBitset
(
threshold
,
num_threahold
,
most_freq_bin
))
{
default_indices
=
lte_indices
;
default_indices
=
lte_indices
;
default_count
=
&
lte_count
;
default_count
=
&
lte_count
;
}
}
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
cnt
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
data_size_t
idx
=
data_indices
[
i
];
uint32_t
bin
=
iterator
.
InnerRawGet
(
idx
);
const
uint32_t
bin
=
iterator
.
RawGet
(
idx
);
if
(
bin
<
min_bin
||
bin
>
max_bin
)
{
if
(
USE_MIN_BIN
&&
(
bin
<
min_bin
||
bin
>
max_bin
))
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
!
USE_MIN_BIN
&&
bin
==
0
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
Common
::
FindInBitset
(
threshold
,
num_threahold
,
bin
-
min_bin
))
{
}
else
if
(
Common
::
FindInBitset
(
threshold
,
num_threahold
,
bin
-
min_bin
))
{
lte_indices
[
lte_count
++
]
=
idx
;
lte_indices
[
lte_count
++
]
=
idx
;
}
else
{
}
else
{
gt_indices
[
gt_count
++
]
=
idx
;
gt_indices
[
gt_count
++
]
=
idx
;
...
@@ -318,6 +385,27 @@ class SparseBin: public Bin {
...
@@ -318,6 +385,27 @@ class SparseBin: public Bin {
return
lte_count
;
return
lte_count
;
}
}
data_size_t
SplitCategorical
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
,
const
uint32_t
*
threshold
,
int
num_threahold
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
return
SplitCategoricalInner
<
true
>
(
min_bin
,
max_bin
,
most_freq_bin
,
threshold
,
num_threahold
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
data_size_t
SplitCategorical
(
uint32_t
max_bin
,
uint32_t
most_freq_bin
,
const
uint32_t
*
threshold
,
int
num_threahold
,
const
data_size_t
*
data_indices
,
data_size_t
cnt
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
return
SplitCategoricalInner
<
false
>
(
1
,
max_bin
,
most_freq_bin
,
threshold
,
num_threahold
,
data_indices
,
cnt
,
lte_indices
,
gt_indices
);
}
data_size_t
num_data
()
const
override
{
return
num_data_
;
}
data_size_t
num_data
()
const
override
{
return
num_data_
;
}
void
FinishLoad
()
override
{
void
FinishLoad
()
override
{
...
@@ -326,24 +414,28 @@ class SparseBin: public Bin {
...
@@ -326,24 +414,28 @@ class SparseBin: public Bin {
for
(
size_t
i
=
0
;
i
<
push_buffers_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
push_buffers_
.
size
();
++
i
)
{
pair_cnt
+=
push_buffers_
[
i
].
size
();
pair_cnt
+=
push_buffers_
[
i
].
size
();
}
}
std
::
vector
<
std
::
pair
<
data_size_t
,
VAL_T
>>&
idx_val_pairs
=
push_buffers_
[
0
];
std
::
vector
<
std
::
pair
<
data_size_t
,
VAL_T
>>&
idx_val_pairs
=
push_buffers_
[
0
];
idx_val_pairs
.
reserve
(
pair_cnt
);
idx_val_pairs
.
reserve
(
pair_cnt
);
for
(
size_t
i
=
1
;
i
<
push_buffers_
.
size
();
++
i
)
{
for
(
size_t
i
=
1
;
i
<
push_buffers_
.
size
();
++
i
)
{
idx_val_pairs
.
insert
(
idx_val_pairs
.
end
(),
push_buffers_
[
i
].
begin
(),
push_buffers_
[
i
].
end
());
idx_val_pairs
.
insert
(
idx_val_pairs
.
end
(),
push_buffers_
[
i
].
begin
(),
push_buffers_
[
i
].
end
());
push_buffers_
[
i
].
clear
();
push_buffers_
[
i
].
clear
();
push_buffers_
[
i
].
shrink_to_fit
();
push_buffers_
[
i
].
shrink_to_fit
();
}
}
// sort by data index
// sort by data index
std
::
sort
(
idx_val_pairs
.
begin
(),
idx_val_pairs
.
end
(),
std
::
sort
(
idx_val_pairs
.
begin
(),
idx_val_pairs
.
end
(),
[](
const
std
::
pair
<
data_size_t
,
VAL_T
>&
a
,
const
std
::
pair
<
data_size_t
,
VAL_T
>&
b
)
{
[](
const
std
::
pair
<
data_size_t
,
VAL_T
>&
a
,
return
a
.
first
<
b
.
first
;
const
std
::
pair
<
data_size_t
,
VAL_T
>&
b
)
{
});
return
a
.
first
<
b
.
first
;
});
// load delta array
// load delta array
LoadFromPair
(
idx_val_pairs
);
LoadFromPair
(
idx_val_pairs
);
}
}
void
LoadFromPair
(
const
std
::
vector
<
std
::
pair
<
data_size_t
,
VAL_T
>>&
idx_val_pairs
)
{
void
LoadFromPair
(
const
std
::
vector
<
std
::
pair
<
data_size_t
,
VAL_T
>>&
idx_val_pairs
)
{
deltas_
.
clear
();
deltas_
.
clear
();
vals_
.
clear
();
vals_
.
clear
();
deltas_
.
reserve
(
idx_val_pairs
.
size
());
deltas_
.
reserve
(
idx_val_pairs
.
size
());
...
@@ -355,7 +447,9 @@ class SparseBin: public Bin {
...
@@ -355,7 +447,9 @@ class SparseBin: public Bin {
const
VAL_T
bin
=
idx_val_pairs
[
i
].
second
;
const
VAL_T
bin
=
idx_val_pairs
[
i
].
second
;
data_size_t
cur_delta
=
cur_idx
-
last_idx
;
data_size_t
cur_delta
=
cur_idx
-
last_idx
;
// disallow the multi-val in one row
// disallow the multi-val in one row
if
(
i
>
0
&&
cur_delta
==
0
)
{
continue
;
}
if
(
i
>
0
&&
cur_delta
==
0
)
{
continue
;
}
while
(
cur_delta
>=
256
)
{
while
(
cur_delta
>=
256
)
{
deltas_
.
push_back
(
255
);
deltas_
.
push_back
(
255
);
vals_
.
push_back
(
0
);
vals_
.
push_back
(
0
);
...
@@ -412,11 +506,13 @@ class SparseBin: public Bin {
...
@@ -412,11 +506,13 @@ class SparseBin: public Bin {
}
}
size_t
SizesInByte
()
const
override
{
size_t
SizesInByte
()
const
override
{
return
sizeof
(
num_vals_
)
+
sizeof
(
uint8_t
)
*
(
num_vals_
+
1
)
return
sizeof
(
num_vals_
)
+
sizeof
(
uint8_t
)
*
(
num_vals_
+
1
)
+
+
sizeof
(
VAL_T
)
*
num_vals_
;
sizeof
(
VAL_T
)
*
num_vals_
;
}
}
void
LoadFromMemory
(
const
void
*
memory
,
const
std
::
vector
<
data_size_t
>&
local_used_indices
)
override
{
void
LoadFromMemory
(
const
void
*
memory
,
const
std
::
vector
<
data_size_t
>&
local_used_indices
)
override
{
const
char
*
mem_ptr
=
reinterpret_cast
<
const
char
*>
(
memory
);
const
char
*
mem_ptr
=
reinterpret_cast
<
const
char
*>
(
memory
);
data_size_t
tmp_num_vals
=
*
(
reinterpret_cast
<
const
data_size_t
*>
(
mem_ptr
));
data_size_t
tmp_num_vals
=
*
(
reinterpret_cast
<
const
data_size_t
*>
(
mem_ptr
));
mem_ptr
+=
sizeof
(
tmp_num_vals
);
mem_ptr
+=
sizeof
(
tmp_num_vals
);
...
@@ -443,7 +539,8 @@ class SparseBin: public Bin {
...
@@ -443,7 +539,8 @@ class SparseBin: public Bin {
std
::
vector
<
std
::
pair
<
data_size_t
,
VAL_T
>>
tmp_pair
;
std
::
vector
<
std
::
pair
<
data_size_t
,
VAL_T
>>
tmp_pair
;
data_size_t
cur_pos
=
0
;
data_size_t
cur_pos
=
0
;
data_size_t
j
=
-
1
;
data_size_t
j
=
-
1
;
for
(
data_size_t
i
=
0
;
i
<
static_cast
<
data_size_t
>
(
local_used_indices
.
size
());
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
static_cast
<
data_size_t
>
(
local_used_indices
.
size
());
++
i
)
{
const
data_size_t
idx
=
local_used_indices
[
i
];
const
data_size_t
idx
=
local_used_indices
[
i
];
while
(
cur_pos
<
idx
&&
j
<
num_vals_
)
{
while
(
cur_pos
<
idx
&&
j
<
num_vals_
)
{
NextNonzero
(
&
j
,
&
cur_pos
);
NextNonzero
(
&
j
,
&
cur_pos
);
...
@@ -457,7 +554,8 @@ class SparseBin: public Bin {
...
@@ -457,7 +554,8 @@ class SparseBin: public Bin {
}
}
}
}
void
CopySubrow
(
const
Bin
*
full_bin
,
const
data_size_t
*
used_indices
,
data_size_t
num_used_indices
)
override
{
void
CopySubrow
(
const
Bin
*
full_bin
,
const
data_size_t
*
used_indices
,
data_size_t
num_used_indices
)
override
{
auto
other_bin
=
dynamic_cast
<
const
SparseBin
<
VAL_T
>*>
(
full_bin
);
auto
other_bin
=
dynamic_cast
<
const
SparseBin
<
VAL_T
>*>
(
full_bin
);
deltas_
.
clear
();
deltas_
.
clear
();
vals_
.
clear
();
vals_
.
clear
();
...
@@ -497,12 +595,16 @@ class SparseBin: public Bin {
...
@@ -497,12 +595,16 @@ class SparseBin: public Bin {
SparseBin
<
VAL_T
>*
Clone
()
override
;
SparseBin
<
VAL_T
>*
Clone
()
override
;
SparseBin
<
VAL_T
>
(
const
SparseBin
<
VAL_T
>&
other
)
SparseBin
<
VAL_T
>
(
const
SparseBin
<
VAL_T
>&
other
)
:
num_data_
(
other
.
num_data_
),
deltas_
(
other
.
deltas_
),
vals_
(
other
.
vals_
),
:
num_data_
(
other
.
num_data_
),
num_vals_
(
other
.
num_vals_
),
push_buffers_
(
other
.
push_buffers_
),
deltas_
(
other
.
deltas_
),
fast_index_
(
other
.
fast_index_
),
fast_index_shift_
(
other
.
fast_index_shift_
)
{
vals_
(
other
.
vals_
),
}
num_vals_
(
other
.
num_vals_
),
push_buffers_
(
other
.
push_buffers_
),
void
InitIndex
(
data_size_t
start_idx
,
data_size_t
*
i_delta
,
data_size_t
*
cur_pos
)
const
{
fast_index_
(
other
.
fast_index_
),
fast_index_shift_
(
other
.
fast_index_shift_
)
{}
void
InitIndex
(
data_size_t
start_idx
,
data_size_t
*
i_delta
,
data_size_t
*
cur_pos
)
const
{
auto
idx
=
start_idx
>>
fast_index_shift_
;
auto
idx
=
start_idx
>>
fast_index_shift_
;
if
(
static_cast
<
size_t
>
(
idx
)
<
fast_index_
.
size
())
{
if
(
static_cast
<
size_t
>
(
idx
)
<
fast_index_
.
size
())
{
const
auto
fast_pair
=
fast_index_
[
start_idx
>>
fast_index_shift_
];
const
auto
fast_pair
=
fast_index_
[
start_idx
>>
fast_index_shift_
];
...
@@ -516,7 +618,8 @@ class SparseBin: public Bin {
...
@@ -516,7 +618,8 @@ class SparseBin: public Bin {
private:
private:
data_size_t
num_data_
;
data_size_t
num_data_
;
std
::
vector
<
uint8_t
,
Common
::
AlignmentAllocator
<
uint8_t
,
kAlignedSize
>>
deltas_
;
std
::
vector
<
uint8_t
,
Common
::
AlignmentAllocator
<
uint8_t
,
kAlignedSize
>>
deltas_
;
std
::
vector
<
VAL_T
,
Common
::
AlignmentAllocator
<
VAL_T
,
kAlignedSize
>>
vals_
;
std
::
vector
<
VAL_T
,
Common
::
AlignmentAllocator
<
VAL_T
,
kAlignedSize
>>
vals_
;
data_size_t
num_vals_
;
data_size_t
num_vals_
;
std
::
vector
<
std
::
vector
<
std
::
pair
<
data_size_t
,
VAL_T
>>>
push_buffers_
;
std
::
vector
<
std
::
vector
<
std
::
pair
<
data_size_t
,
VAL_T
>>>
push_buffers_
;
...
@@ -524,7 +627,7 @@ class SparseBin: public Bin {
...
@@ -524,7 +627,7 @@ class SparseBin: public Bin {
data_size_t
fast_index_shift_
;
data_size_t
fast_index_shift_
;
};
};
template
<
typename
VAL_T
>
template
<
typename
VAL_T
>
SparseBin
<
VAL_T
>*
SparseBin
<
VAL_T
>::
Clone
()
{
SparseBin
<
VAL_T
>*
SparseBin
<
VAL_T
>::
Clone
()
{
return
new
SparseBin
(
*
this
);
return
new
SparseBin
(
*
this
);
}
}
...
@@ -552,9 +655,10 @@ inline void SparseBinIterator<VAL_T>::Reset(data_size_t start_idx) {
...
@@ -552,9 +655,10 @@ inline void SparseBinIterator<VAL_T>::Reset(data_size_t start_idx) {
}
}
template
<
typename
VAL_T
>
template
<
typename
VAL_T
>
BinIterator
*
SparseBin
<
VAL_T
>::
GetIterator
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
)
const
{
BinIterator
*
SparseBin
<
VAL_T
>::
GetIterator
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
most_freq_bin
)
const
{
return
new
SparseBinIterator
<
VAL_T
>
(
this
,
min_bin
,
max_bin
,
most_freq_bin
);
return
new
SparseBinIterator
<
VAL_T
>
(
this
,
min_bin
,
max_bin
,
most_freq_bin
);
}
}
}
// namespace LightGBM
}
// namespace LightGBM
#endif
// LightGBM_IO_SPARSE_BIN_HPP_
#endif // LightGBM_IO_SPARSE_BIN_HPP_
src/treelearner/data_partition.hpp
View file @
bcad692e
...
@@ -106,7 +106,7 @@ class DataPartition {
...
@@ -106,7 +106,7 @@ class DataPartition {
const
data_size_t
begin
=
leaf_begin_
[
leaf
];
const
data_size_t
begin
=
leaf_begin_
[
leaf
];
const
data_size_t
cnt
=
leaf_count_
[
leaf
];
const
data_size_t
cnt
=
leaf_count_
[
leaf
];
auto
left_start
=
indices_
.
data
()
+
begin
;
auto
left_start
=
indices_
.
data
()
+
begin
;
auto
left_cnt
=
runner_
.
Run
<
false
>
(
const
auto
left_cnt
=
runner_
.
Run
<
false
>
(
cnt
,
cnt
,
[
=
](
int
,
data_size_t
cur_start
,
data_size_t
cur_cnt
,
data_size_t
*
left
,
[
=
](
int
,
data_size_t
cur_start
,
data_size_t
cur_cnt
,
data_size_t
*
left
,
data_size_t
*
right
)
{
data_size_t
*
right
)
{
...
...
src/treelearner/feature_histogram.hpp
View file @
bcad692e
...
@@ -144,72 +144,67 @@ class FeatureHistogram {
...
@@ -144,72 +144,67 @@ class FeatureHistogram {
template
<
bool
USE_RAND
,
bool
USE_MC
,
bool
USE_L1
,
bool
USE_MAX_OUTPUT
>
template
<
bool
USE_RAND
,
bool
USE_MC
,
bool
USE_L1
,
bool
USE_MAX_OUTPUT
>
void
FuncForNumricalL2
()
{
void
FuncForNumricalL2
()
{
#define TEMPLATE_PREFIX USE_RAND, USE_MC, USE_L1, USE_MAX_OUTPUT
#define LAMBDA_ARGUMENTS \
double sum_gradient, double sum_hessian, data_size_t num_data, \
const ConstraintEntry &constraints, SplitInfo *output
#define BEFORE_ARGUMENTS sum_gradient, sum_hessian, output, &rand_threshold
#define FUNC_ARGUMENTS \
sum_gradient, sum_hessian, num_data, constraints, min_gain_shift, output, \
rand_threshold
if
(
meta_
->
num_bin
>
2
&&
meta_
->
missing_type
!=
MissingType
::
None
)
{
if
(
meta_
->
num_bin
>
2
&&
meta_
->
missing_type
!=
MissingType
::
None
)
{
if
(
meta_
->
missing_type
==
MissingType
::
Zero
)
{
if
(
meta_
->
missing_type
==
MissingType
::
Zero
)
{
find_best_threshold_fun_
=
find_best_threshold_fun_
=
[
=
](
LAMBDA_ARGUMENTS
)
{
[
=
](
double
sum_gradient
,
double
sum_hessian
,
data_size_t
num_data
,
int
rand_threshold
=
0
;
const
ConstraintEntry
&
constraints
,
SplitInfo
*
output
)
{
double
min_gain_shift
=
int
rand_threshold
=
0
;
BeforeNumercal
<
USE_RAND
,
USE_L1
,
USE_MAX_OUTPUT
>
(
double
min_gain_shift
=
BEFORE_ARGUMENTS
);
BeforeNumercal
<
USE_RAND
,
USE_L1
,
USE_MAX_OUTPUT
>
(
FindBestThresholdSequentially
<
TEMPLATE_PREFIX
,
true
,
true
,
false
>
(
sum_gradient
,
sum_hessian
,
output
,
&
rand_threshold
);
FUNC_ARGUMENTS
);
FindBestThresholdSequentially
<
USE_RAND
,
USE_MC
,
USE_L1
,
FindBestThresholdSequentially
<
TEMPLATE_PREFIX
,
false
,
true
,
false
>
(
USE_MAX_OUTPUT
,
true
,
true
,
false
>
(
FUNC_ARGUMENTS
);
sum_gradient
,
sum_hessian
,
num_data
,
constraints
,
};
min_gain_shift
,
output
,
rand_threshold
);
FindBestThresholdSequentially
<
USE_RAND
,
USE_MC
,
USE_L1
,
USE_MAX_OUTPUT
,
false
,
true
,
false
>
(
sum_gradient
,
sum_hessian
,
num_data
,
constraints
,
min_gain_shift
,
output
,
rand_threshold
);
};
}
else
{
}
else
{
find_best_threshold_fun_
=
find_best_threshold_fun_
=
[
=
](
LAMBDA_ARGUMENTS
)
{
[
=
](
double
sum_gradient
,
double
sum_hessian
,
data_size_t
num_data
,
int
rand_threshold
=
0
;
const
ConstraintEntry
&
constraints
,
SplitInfo
*
output
)
{
double
min_gain_shift
=
int
rand_threshold
=
0
;
BeforeNumercal
<
USE_RAND
,
USE_L1
,
USE_MAX_OUTPUT
>
(
double
min_gain_shift
=
BEFORE_ARGUMENTS
);
BeforeNumercal
<
USE_RAND
,
USE_L1
,
USE_MAX_OUTPUT
>
(
FindBestThresholdSequentially
<
TEMPLATE_PREFIX
,
true
,
false
,
true
>
(
sum_gradient
,
sum_hessian
,
output
,
&
rand_threshold
);
FUNC_ARGUMENTS
);
FindBestThresholdSequentially
<
USE_RAND
,
USE_MC
,
USE_L1
,
FindBestThresholdSequentially
<
TEMPLATE_PREFIX
,
false
,
false
,
true
>
(
USE_MAX_OUTPUT
,
true
,
false
,
true
>
(
FUNC_ARGUMENTS
);
sum_gradient
,
sum_hessian
,
num_data
,
constraints
,
};
min_gain_shift
,
output
,
rand_threshold
);
FindBestThresholdSequentially
<
USE_RAND
,
USE_MC
,
USE_L1
,
USE_MAX_OUTPUT
,
false
,
false
,
true
>
(
sum_gradient
,
sum_hessian
,
num_data
,
constraints
,
min_gain_shift
,
output
,
rand_threshold
);
};
}
}
}
else
{
}
else
{
if
(
meta_
->
missing_type
!=
MissingType
::
NaN
)
{
if
(
meta_
->
missing_type
!=
MissingType
::
NaN
)
{
find_best_threshold_fun_
=
find_best_threshold_fun_
=
[
=
](
LAMBDA_ARGUMENTS
)
{
[
=
](
double
sum_gradient
,
double
sum_hessian
,
data_size_t
num_data
,
int
rand_threshold
=
0
;
const
ConstraintEntry
&
constraints
,
SplitInfo
*
output
)
{
double
min_gain_shift
=
int
rand_threshold
=
0
;
BeforeNumercal
<
USE_RAND
,
USE_L1
,
USE_MAX_OUTPUT
>
(
double
min_gain_shift
=
BEFORE_ARGUMENTS
);
BeforeNumercal
<
USE_RAND
,
USE_L1
,
USE_MAX_OUTPUT
>
(
FindBestThresholdSequentially
<
TEMPLATE_PREFIX
,
true
,
false
,
false
>
(
sum_gradient
,
sum_hessian
,
output
,
&
rand_threshold
);
FUNC_ARGUMENTS
);
FindBestThresholdSequentially
<
USE_RAND
,
USE_MC
,
USE_L1
,
};
USE_MAX_OUTPUT
,
true
,
false
,
false
>
(
sum_gradient
,
sum_hessian
,
num_data
,
constraints
,
min_gain_shift
,
output
,
rand_threshold
);
};
}
else
{
}
else
{
find_best_threshold_fun_
=
find_best_threshold_fun_
=
[
=
](
LAMBDA_ARGUMENTS
)
{
[
=
](
double
sum_gradient
,
double
sum_hessian
,
data_size_t
num_data
,
int
rand_threshold
=
0
;
const
ConstraintEntry
&
constraints
,
SplitInfo
*
output
)
{
double
min_gain_shift
=
int
rand_threshold
=
0
;
BeforeNumercal
<
USE_RAND
,
USE_L1
,
USE_MAX_OUTPUT
>
(
double
min_gain_shift
=
BEFORE_ARGUMENTS
);
BeforeNumercal
<
USE_RAND
,
USE_L1
,
USE_MAX_OUTPUT
>
(
FindBestThresholdSequentially
<
USE_RAND
,
USE_MC
,
USE_L1
,
sum_gradient
,
sum_hessian
,
output
,
&
rand_threshold
);
USE_MAX_OUTPUT
,
true
,
false
,
false
>
(
FindBestThresholdSequentially
<
USE_RAND
,
USE_MC
,
USE_L1
,
FUNC_ARGUMENTS
);
USE_MAX_OUTPUT
,
true
,
false
,
false
>
(
output
->
default_left
=
false
;
sum_gradient
,
sum_hessian
,
num_data
,
constraints
,
};
min_gain_shift
,
output
,
rand_threshold
);
output
->
default_left
=
false
;
};
}
}
}
}
#undef TEMPLATE_PREFIX
#undef LAMBDA_ARGUMENTS
#undef BEFORE_ARGUMENTS
#undef FUNC_ARGURMENTS
}
}
void
FuncForCategorical
()
{
void
FuncForCategorical
()
{
...
@@ -227,41 +222,38 @@ class FeatureHistogram {
...
@@ -227,41 +222,38 @@ class FeatureHistogram {
}
}
}
}
}
}
template
<
bool
USE_RAND
,
bool
USE_MC
>
template
<
bool
USE_RAND
,
bool
USE_MC
>
void
FuncForCategoricalL1
()
{
void
FuncForCategoricalL1
()
{
#define ARGUMENTS \
std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, \
std::placeholders::_4, std::placeholders::_5
if
(
meta_
->
config
->
lambda_l1
>
0
)
{
if
(
meta_
->
config
->
lambda_l1
>
0
)
{
if
(
meta_
->
config
->
max_delta_step
>
0
)
{
if
(
meta_
->
config
->
max_delta_step
>
0
)
{
find_best_threshold_fun_
=
find_best_threshold_fun_
=
std
::
bind
(
&
FeatureHistogram
::
FindBestThresholdCategoricalInner
<
std
::
bind
(
&
FeatureHistogram
::
FindBestThresholdCategoricalInner
<
USE_RAND
,
USE_MC
,
true
,
true
>
,
USE_RAND
,
USE_MC
,
true
,
true
>
,
this
,
std
::
placeholders
::
_1
,
std
::
placeholders
::
_2
,
this
,
ARGUMENTS
);
std
::
placeholders
::
_3
,
std
::
placeholders
::
_4
,
std
::
placeholders
::
_5
);
}
else
{
}
else
{
find_best_threshold_fun_
=
find_best_threshold_fun_
=
std
::
bind
(
&
FeatureHistogram
::
FindBestThresholdCategoricalInner
<
std
::
bind
(
&
FeatureHistogram
::
FindBestThresholdCategoricalInner
<
USE_RAND
,
USE_MC
,
true
,
false
>
,
USE_RAND
,
USE_MC
,
true
,
false
>
,
this
,
std
::
placeholders
::
_1
,
std
::
placeholders
::
_2
,
this
,
ARGUMENTS
);
std
::
placeholders
::
_3
,
std
::
placeholders
::
_4
,
std
::
placeholders
::
_5
);
}
}
}
else
{
}
else
{
if
(
meta_
->
config
->
max_delta_step
>
0
)
{
if
(
meta_
->
config
->
max_delta_step
>
0
)
{
find_best_threshold_fun_
=
find_best_threshold_fun_
=
std
::
bind
(
&
FeatureHistogram
::
FindBestThresholdCategoricalInner
<
std
::
bind
(
&
FeatureHistogram
::
FindBestThresholdCategoricalInner
<
USE_RAND
,
USE_MC
,
false
,
true
>
,
USE_RAND
,
USE_MC
,
false
,
true
>
,
this
,
std
::
placeholders
::
_1
,
std
::
placeholders
::
_2
,
this
,
ARGUMENTS
);
std
::
placeholders
::
_3
,
std
::
placeholders
::
_4
,
std
::
placeholders
::
_5
);
}
else
{
}
else
{
find_best_threshold_fun_
=
find_best_threshold_fun_
=
std
::
bind
(
&
FeatureHistogram
::
FindBestThresholdCategoricalInner
<
std
::
bind
(
&
FeatureHistogram
::
FindBestThresholdCategoricalInner
<
USE_RAND
,
USE_MC
,
false
,
false
>
,
USE_RAND
,
USE_MC
,
false
,
false
>
,
this
,
std
::
placeholders
::
_1
,
std
::
placeholders
::
_2
,
this
,
ARGUMENTS
);
std
::
placeholders
::
_3
,
std
::
placeholders
::
_4
,
std
::
placeholders
::
_5
);
}
}
}
}
#undef ARGUMENTS
}
}
template
<
bool
USE_RAND
,
bool
USE_MC
,
bool
USE_L1
,
bool
USE_MAX_OUTPUT
>
template
<
bool
USE_RAND
,
bool
USE_MC
,
bool
USE_L1
,
bool
USE_MAX_OUTPUT
>
...
...
src/treelearner/gpu_tree_learner.cpp
View file @
bcad692e
...
@@ -13,7 +13,6 @@
...
@@ -13,7 +13,6 @@
#include <algorithm>
#include <algorithm>
#include "../io/dense_bin.hpp"
#include "../io/dense_bin.hpp"
#include "../io/dense_nbits_bin.hpp"
#define GPU_DEBUG 0
#define GPU_DEBUG 0
...
@@ -378,20 +377,20 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -378,20 +377,20 @@ void GPUTreeLearner::AllocateGPUMemory() {
BinIterator
*
bin_iters
[
8
];
BinIterator
*
bin_iters
[
8
];
for
(
int
s_idx
=
0
;
s_idx
<
8
;
++
s_idx
)
{
for
(
int
s_idx
=
0
;
s_idx
<
8
;
++
s_idx
)
{
bin_iters
[
s_idx
]
=
train_data_
->
FeatureGroupIterator
(
dense_ind
[
s_idx
]);
bin_iters
[
s_idx
]
=
train_data_
->
FeatureGroupIterator
(
dense_ind
[
s_idx
]);
if
(
dynamic_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
s_idx
])
==
0
)
{
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
s_idx
])
==
0
)
{
Log
::
Fatal
(
"GPU tree learner assumes that all bins are Dense4bitsBin when num_bin <= 16, but feature %d is not"
,
dense_ind
[
s_idx
]);
Log
::
Fatal
(
"GPU tree learner assumes that all bins are Dense4bitsBin when num_bin <= 16, but feature %d is not"
,
dense_ind
[
s_idx
]);
}
}
}
}
// this guarantees that the RawGet() function is inlined, rather than using virtual function dispatching
// this guarantees that the RawGet() function is inlined, rather than using virtual function dispatching
Dense
4bits
BinIterator
iters
[
8
]
=
{
DenseBinIterator
<
uint8_t
,
true
>
iters
[
8
]
=
{
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
0
]),
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
0
]),
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
1
]),
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
1
]),
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
2
]),
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
2
]),
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
3
]),
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
3
]),
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
4
]),
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
4
]),
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
5
]),
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
5
]),
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
6
]),
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
6
]),
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iters
[
7
])};
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iters
[
7
])};
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
0
]
=
(
uint8_t
)((
iters
[
0
].
RawGet
(
j
)
*
dev_bin_mult
[
0
]
+
((
j
+
0
)
&
(
dev_bin_mult
[
0
]
-
1
)))
host4
[
j
].
s
[
0
]
=
(
uint8_t
)((
iters
[
0
].
RawGet
(
j
)
*
dev_bin_mult
[
0
]
+
((
j
+
0
)
&
(
dev_bin_mult
[
0
]
-
1
)))
|
((
iters
[
1
].
RawGet
(
j
)
*
dev_bin_mult
[
1
]
+
((
j
+
1
)
&
(
dev_bin_mult
[
1
]
-
1
)))
<<
4
));
|
((
iters
[
1
].
RawGet
(
j
)
*
dev_bin_mult
[
1
]
+
((
j
+
1
)
&
(
dev_bin_mult
[
1
]
-
1
)))
<<
4
));
...
@@ -407,15 +406,15 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -407,15 +406,15 @@ void GPUTreeLearner::AllocateGPUMemory() {
for
(
int
s_idx
=
0
;
s_idx
<
4
;
++
s_idx
)
{
for
(
int
s_idx
=
0
;
s_idx
<
4
;
++
s_idx
)
{
BinIterator
*
bin_iter
=
train_data_
->
FeatureGroupIterator
(
dense_ind
[
s_idx
]);
BinIterator
*
bin_iter
=
train_data_
->
FeatureGroupIterator
(
dense_ind
[
s_idx
]);
// this guarantees that the RawGet() function is inlined, rather than using virtual function dispatching
// this guarantees that the RawGet() function is inlined, rather than using virtual function dispatching
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
>*>
(
bin_iter
)
!=
0
)
{
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
false
>*>
(
bin_iter
)
!=
0
)
{
// Dense bin
// Dense bin
DenseBinIterator
<
uint8_t
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
>*>
(
bin_iter
);
DenseBinIterator
<
uint8_t
,
false
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
,
false
>*>
(
bin_iter
);
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
s_idx
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
dev_bin_mult
[
s_idx
]
+
((
j
+
s_idx
)
&
(
dev_bin_mult
[
s_idx
]
-
1
)));
host4
[
j
].
s
[
s_idx
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
dev_bin_mult
[
s_idx
]
+
((
j
+
s_idx
)
&
(
dev_bin_mult
[
s_idx
]
-
1
)));
}
}
}
else
if
(
dynamic_cast
<
Dense
4bits
BinIterator
*>
(
bin_iter
)
!=
0
)
{
}
else
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iter
)
!=
0
)
{
// Dense 4-bit bin
// Dense 4-bit bin
Dense
4bits
BinIterator
iter
=
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iter
);
DenseBinIterator
<
uint8_t
,
true
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iter
);
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
s_idx
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
dev_bin_mult
[
s_idx
]
+
((
j
+
s_idx
)
&
(
dev_bin_mult
[
s_idx
]
-
1
)));
host4
[
j
].
s
[
s_idx
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
dev_bin_mult
[
s_idx
]
+
((
j
+
s_idx
)
&
(
dev_bin_mult
[
s_idx
]
-
1
)));
}
}
...
@@ -450,8 +449,8 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -450,8 +449,8 @@ void GPUTreeLearner::AllocateGPUMemory() {
for
(
int
i
=
0
;
i
<
k
;
++
i
)
{
for
(
int
i
=
0
;
i
<
k
;
++
i
)
{
if
(
dword_features_
==
8
)
{
if
(
dword_features_
==
8
)
{
BinIterator
*
bin_iter
=
train_data_
->
FeatureGroupIterator
(
dense_dword_ind
[
i
]);
BinIterator
*
bin_iter
=
train_data_
->
FeatureGroupIterator
(
dense_dword_ind
[
i
]);
if
(
dynamic_cast
<
Dense
4bits
BinIterator
*>
(
bin_iter
)
!=
0
)
{
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iter
)
!=
0
)
{
Dense
4bits
BinIterator
iter
=
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iter
);
DenseBinIterator
<
uint8_t
,
true
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iter
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
i
>>
1
]
|=
(
uint8_t
)((
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
host4
[
j
].
s
[
i
>>
1
]
|=
(
uint8_t
)((
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
...
@@ -463,15 +462,15 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -463,15 +462,15 @@ void GPUTreeLearner::AllocateGPUMemory() {
}
}
}
else
if
(
dword_features_
==
4
)
{
}
else
if
(
dword_features_
==
4
)
{
BinIterator
*
bin_iter
=
train_data_
->
FeatureGroupIterator
(
dense_dword_ind
[
i
]);
BinIterator
*
bin_iter
=
train_data_
->
FeatureGroupIterator
(
dense_dword_ind
[
i
]);
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
>*>
(
bin_iter
)
!=
0
)
{
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
false
>*>
(
bin_iter
)
!=
0
)
{
DenseBinIterator
<
uint8_t
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
>*>
(
bin_iter
);
DenseBinIterator
<
uint8_t
,
false
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
,
false
>*>
(
bin_iter
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
i
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
host4
[
j
].
s
[
i
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
+
((
j
+
i
)
&
(
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
-
1
)));
+
((
j
+
i
)
&
(
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
-
1
)));
}
}
}
else
if
(
dynamic_cast
<
Dense
4bits
BinIterator
*>
(
bin_iter
)
!=
0
)
{
}
else
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iter
)
!=
0
)
{
Dense
4bits
BinIterator
iter
=
*
static_cast
<
Dense
4bits
BinIterator
*>
(
bin_iter
);
DenseBinIterator
<
uint8_t
,
true
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>
*>
(
bin_iter
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
i
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
host4
[
j
].
s
[
i
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
...
...
windows/LightGBM.vcxproj
View file @
bcad692e
...
@@ -263,7 +263,6 @@
...
@@ -263,7 +263,6 @@
<ClInclude
Include=
"..\src\boosting\rf.hpp"
/>
<ClInclude
Include=
"..\src\boosting\rf.hpp"
/>
<ClInclude
Include=
"..\src\boosting\score_updater.hpp"
/>
<ClInclude
Include=
"..\src\boosting\score_updater.hpp"
/>
<ClInclude
Include=
"..\src\io\dense_bin.hpp"
/>
<ClInclude
Include=
"..\src\io\dense_bin.hpp"
/>
<ClInclude
Include=
"..\src\io\dense_nbits_bin.hpp"
/>
<ClInclude
Include=
"..\src\io\multi_val_dense_bin.hpp"
/>
<ClInclude
Include=
"..\src\io\multi_val_dense_bin.hpp"
/>
<ClInclude
Include=
"..\src\io\multi_val_sparse_bin.hpp"
/>
<ClInclude
Include=
"..\src\io\multi_val_sparse_bin.hpp"
/>
<ClInclude
Include=
"..\src\io\parser.hpp"
/>
<ClInclude
Include=
"..\src\io\parser.hpp"
/>
...
...
windows/LightGBM.vcxproj.filters
View file @
bcad692e
...
@@ -174,9 +174,6 @@
...
@@ -174,9 +174,6 @@
<ClInclude
Include=
"..\src\boosting\goss.hpp"
>
<ClInclude
Include=
"..\src\boosting\goss.hpp"
>
<Filter>
src\boosting
</Filter>
<Filter>
src\boosting
</Filter>
</ClInclude>
</ClInclude>
<ClInclude
Include=
"..\src\io\dense_nbits_bin.hpp"
>
<Filter>
src\io
</Filter>
</ClInclude>
<ClInclude
Include=
"..\include\LightGBM\utils\openmp_wrapper.h"
>
<ClInclude
Include=
"..\include\LightGBM\utils\openmp_wrapper.h"
>
<Filter>
include\LightGBM\utils
</Filter>
<Filter>
include\LightGBM\utils
</Filter>
</ClInclude>
</ClInclude>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment