Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
6c4a9750
Commit
6c4a9750
authored
Aug 20, 2017
by
Guolin Ke
Browse files
clean code for the split of bins and leaves.
parent
8fb26b06
Changes
13
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
922 additions
and
889 deletions
+922
-889
include/LightGBM/bin.h
include/LightGBM/bin.h
+451
-435
include/LightGBM/feature_group.h
include/LightGBM/feature_group.h
+8
-3
include/LightGBM/tree.h
include/LightGBM/tree.h
+160
-64
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+2
-2
src/boosting/rf.hpp
src/boosting/rf.hpp
+1
-1
src/io/dense_bin.hpp
src/io/dense_bin.hpp
+52
-43
src/io/dense_nbits_bin.hpp
src/io/dense_nbits_bin.hpp
+52
-42
src/io/sparse_bin.hpp
src/io/sparse_bin.hpp
+53
-43
src/io/tree.cpp
src/io/tree.cpp
+104
-234
src/metric/dcg_calculator.cpp
src/metric/dcg_calculator.cpp
+4
-4
src/treelearner/serial_tree_learner.cpp
src/treelearner/serial_tree_learner.cpp
+31
-18
windows/LightGBM.vcxproj
windows/LightGBM.vcxproj
+1
-0
windows/LightGBM.vcxproj.filters
windows/LightGBM.vcxproj.filters
+3
-0
No files found.
include/LightGBM/bin.h
View file @
6c4a9750
This diff is collapsed.
Click to expand it.
include/LightGBM/feature_group.h
View file @
6c4a9750
...
@@ -168,9 +168,14 @@ public:
...
@@ -168,9 +168,14 @@ public:
uint32_t
min_bin
=
bin_offsets_
[
sub_feature
];
uint32_t
min_bin
=
bin_offsets_
[
sub_feature
];
uint32_t
max_bin
=
bin_offsets_
[
sub_feature
+
1
]
-
1
;
uint32_t
max_bin
=
bin_offsets_
[
sub_feature
+
1
]
-
1
;
uint32_t
default_bin
=
bin_mappers_
[
sub_feature
]
->
GetDefaultBin
();
uint32_t
default_bin
=
bin_mappers_
[
sub_feature
]
->
GetDefaultBin
();
auto
missing_type
=
bin_mappers_
[
sub_feature
]
->
missing_type
();
if
(
bin_mappers_
[
sub_feature
]
->
bin_type
()
==
BinType
::
NumericalBin
)
{
return
bin_data_
->
Split
(
min_bin
,
max_bin
,
default_bin
,
missing_type
,
default_left
,
auto
missing_type
=
bin_mappers_
[
sub_feature
]
->
missing_type
();
threshold
,
data_indices
,
num_data
,
lte_indices
,
gt_indices
,
bin_mappers_
[
sub_feature
]
->
bin_type
());
return
bin_data_
->
Split
(
min_bin
,
max_bin
,
default_bin
,
missing_type
,
default_left
,
threshold
,
data_indices
,
num_data
,
lte_indices
,
gt_indices
);
}
else
{
return
bin_data_
->
SplitCategorical
(
min_bin
,
max_bin
,
default_bin
,
threshold
,
data_indices
,
num_data
,
lte_indices
,
gt_indices
);
}
}
}
/*!
/*!
* \brief From bin to feature value
* \brief From bin to feature value
...
...
include/LightGBM/tree.h
View file @
6c4a9750
...
@@ -37,9 +37,8 @@ public:
...
@@ -37,9 +37,8 @@ public:
* \brief Performing a split on tree leaves.
* \brief Performing a split on tree leaves.
* \param leaf Index of leaf to be split
* \param leaf Index of leaf to be split
* \param feature Index of feature; the converted index after removing useless features
* \param feature Index of feature; the converted index after removing useless features
* \param bin_type type of this feature, numerical or categorical
* \param threshold Threshold(bin) of split
* \param real_feature Index of feature, the original index on data
* \param real_feature Index of feature, the original index on data
* \param threshold_bin Threshold(bin) of split
* \param threshold_double Threshold on feature value
* \param threshold_double Threshold on feature value
* \param left_value Model Left child output
* \param left_value Model Left child output
* \param right_value Model Right child output
* \param right_value Model Right child output
...
@@ -50,10 +49,29 @@ public:
...
@@ -50,10 +49,29 @@ public:
* \param default_left default direction for missing value
* \param default_left default direction for missing value
* \return The index of new leaf.
* \return The index of new leaf.
*/
*/
int
Split
(
int
leaf
,
int
feature
,
B
in
Type
bin_typ
e
,
uint32_t
threshold
,
int
real_feature
,
int
Split
(
int
leaf
,
int
feature
,
in
t
real_featur
e
,
uint32_t
threshold
_bin
,
double
threshold_double
,
double
left_value
,
double
right_value
,
double
threshold_double
,
double
left_value
,
double
right_value
,
data_size_t
left_cnt
,
data_size_t
right_cnt
,
double
gain
,
MissingType
missing_type
,
bool
default_left
);
data_size_t
left_cnt
,
data_size_t
right_cnt
,
double
gain
,
MissingType
missing_type
,
bool
default_left
);
/*!
* \brief Performing a split on tree leaves, with categorical feature
* \param leaf Index of leaf to be split
* \param feature Index of feature; the converted index after removing useless features
* \param real_feature Index of feature, the original index on data
* \param threshold_bin Threshold(bin) of split, use bitset to represent
* \param num_threshold_bin size of threshold_bin
* \param threshold
* \param left_value Model Left child output
* \param right_value Model Right child output
* \param left_cnt Count of left child
* \param right_cnt Count of right child
* \param gain Split gain
* \return The index of new leaf.
*/
int
SplitCategorical
(
int
leaf
,
int
feature
,
int
real_feature
,
uint32_t
threshold_bin
,
double
threshold
,
double
left_value
,
double
right_value
,
data_size_t
left_cnt
,
data_size_t
right_cnt
,
double
gain
,
MissingType
missing_type
);
/*! \brief Get the output of one leaf */
/*! \brief Get the output of one leaf */
inline
double
LeafOutput
(
int
leaf
)
const
{
return
leaf_value_
[
leaf
];
}
inline
double
LeafOutput
(
int
leaf
)
const
{
return
leaf_value_
[
leaf
];
}
...
@@ -89,6 +107,7 @@ public:
...
@@ -89,6 +107,7 @@ public:
* \return Prediction result
* \return Prediction result
*/
*/
inline
double
Predict
(
const
double
*
feature_values
)
const
;
inline
double
Predict
(
const
double
*
feature_values
)
const
;
inline
int
PredictLeafIndex
(
const
double
*
feature_values
)
const
;
inline
int
PredictLeafIndex
(
const
double
*
feature_values
)
const
;
inline
void
PredictContrib
(
const
double
*
feature_values
,
int
num_features
,
double
*
output
)
const
;
inline
void
PredictContrib
(
const
double
*
feature_values
,
int
num_features
,
double
*
output
)
const
;
...
@@ -139,7 +158,7 @@ public:
...
@@ -139,7 +158,7 @@ public:
* \param rate The factor of shrinkage
* \param rate The factor of shrinkage
*/
*/
inline
void
Shrinkage
(
double
rate
)
{
inline
void
Shrinkage
(
double
rate
)
{
#pragma omp parallel for schedule(static,
512
) if (num_leaves_ >=
1024
)
#pragma omp parallel for schedule(static,
1024
) if (num_leaves_ >=
2048
)
for
(
int
i
=
0
;
i
<
num_leaves_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_leaves_
;
++
i
)
{
leaf_value_
[
i
]
*=
rate
;
leaf_value_
[
i
]
*=
rate
;
if
(
leaf_value_
[
i
]
>
kMaxTreeOutput
)
{
leaf_value_
[
i
]
=
kMaxTreeOutput
;
}
if
(
leaf_value_
[
i
]
>
kMaxTreeOutput
)
{
leaf_value_
[
i
]
=
kMaxTreeOutput
;
}
...
@@ -157,24 +176,6 @@ public:
...
@@ -157,24 +176,6 @@ public:
/*! \brief Serialize this object to if-else statement*/
/*! \brief Serialize this object to if-else statement*/
std
::
string
ToIfElse
(
int
index
,
bool
is_predict_leaf_index
);
std
::
string
ToIfElse
(
int
index
,
bool
is_predict_leaf_index
);
template
<
typename
T
>
inline
static
bool
CategoricalDecision
(
T
fval
,
T
threshold
)
{
if
(
static_cast
<
int
>
(
fval
)
==
static_cast
<
int
>
(
threshold
))
{
return
true
;
}
else
{
return
false
;
}
}
template
<
typename
T
>
inline
static
bool
NumericalDecision
(
T
fval
,
T
threshold
)
{
if
(
fval
<=
threshold
)
{
return
true
;
}
else
{
return
false
;
}
}
inline
static
bool
IsZero
(
double
fval
)
{
inline
static
bool
IsZero
(
double
fval
)
{
if
(
fval
>
-
kZeroAsMissingValueRange
&&
fval
<=
kZeroAsMissingValueRange
)
{
if
(
fval
>
-
kZeroAsMissingValueRange
&&
fval
<=
kZeroAsMissingValueRange
)
{
return
true
;
return
true
;
...
@@ -204,21 +205,44 @@ public:
...
@@ -204,21 +205,44 @@ public:
(
*
decision_type
)
|=
(
input
<<
2
);
(
*
decision_type
)
|=
(
input
<<
2
);
}
}
inline
static
uint32_t
ConvertMissingValue
(
uint32_t
fval
,
uint32_t
threshold
,
int8_t
decision_type
,
uint32_t
default_bin
,
uint32_t
max_bin
)
{
private:
uint8_t
missing_type
=
GetMissingType
(
decision_type
);
if
((
missing_type
==
1
&&
fval
==
default_bin
)
inline
std
::
string
NumericalDecisionIfElse
(
int
node
)
{
||
(
missing_type
==
2
&&
fval
==
max_bin
))
{
std
::
stringstream
str_buf
;
if
(
GetDecisionType
(
decision_type
,
kDefaultLeftMask
))
{
uint8_t
missing_type
=
GetMissingType
(
decision_type_
[
node
]);
fval
=
threshold
;
bool
default_left
=
GetDecisionType
(
decision_type_
[
node
],
kDefaultLeftMask
);
if
(
missing_type
==
0
||
(
missing_type
==
1
&&
default_left
&&
kZeroAsMissingValueRange
<
threshold_
[
node
]))
{
str_buf
<<
"if (fval <= "
<<
threshold_
[
node
]
<<
") {"
;
}
else
if
(
missing_type
==
1
)
{
if
(
default_left
)
{
str_buf
<<
"if (fval <= "
<<
threshold_
[
node
]
<<
" || Tree::IsZero(fval)"
<<
" || std::isnan(fval)) {"
;
}
else
{
str_buf
<<
"if (fval <= "
<<
threshold_
[
node
]
<<
" && !Tree::IsZero(fval)"
<<
" && !std::isnan(fval)) {"
;
}
}
else
{
if
(
default_left
)
{
str_buf
<<
"if (fval <= "
<<
threshold_
[
node
]
<<
" || std::isnan(fval)) {"
;
}
else
{
}
else
{
fval
=
threshold
+
1
;
str_buf
<<
"if (
fval
<
=
"
<<
threshold
_
[
node
]
<<
" && !std::isnan(fval)) {"
;
}
}
}
}
return
fval
;
return
str_buf
.
str
()
;
}
}
inline
static
double
ConvertMissingValue
(
double
fval
,
double
threshold
,
int8_t
decision_type
)
{
inline
std
::
string
CategoricalDecisionIfElse
(
int
node
)
const
{
uint8_t
missing_type
=
GetMissingType
(
decision_type
);
uint8_t
missing_type
=
GetMissingType
(
decision_type_
[
node
]);
std
::
stringstream
str_buf
;
if
(
missing_type
==
2
)
{
str_buf
<<
"if (std::isnan(fval)) { int_fval = -1; } else { int_fval = static_cast<int>(fval); }"
;
}
else
{
str_buf
<<
"if (std::isnan(fval)) { int_fval = 0; } else { int_fval = static_cast<int>(fval); }"
;
}
str_buf
<<
"if (int_fval >= 0 && int_fval == "
<<
static_cast
<
int
>
(
threshold_
[
node
])
<<
") {"
;
return
str_buf
.
str
();
}
inline
int
NumericalDecision
(
double
fval
,
int
node
)
const
{
uint8_t
missing_type
=
GetMissingType
(
decision_type_
[
node
]);
if
(
std
::
isnan
(
fval
))
{
if
(
std
::
isnan
(
fval
))
{
if
(
missing_type
!=
2
)
{
if
(
missing_type
!=
2
)
{
fval
=
0.0
f
;
fval
=
0.0
f
;
...
@@ -226,28 +250,79 @@ public:
...
@@ -226,28 +250,79 @@ public:
}
}
if
((
missing_type
==
1
&&
IsZero
(
fval
))
if
((
missing_type
==
1
&&
IsZero
(
fval
))
||
(
missing_type
==
2
&&
std
::
isnan
(
fval
)))
{
||
(
missing_type
==
2
&&
std
::
isnan
(
fval
)))
{
if
(
GetDecisionType
(
decision_type
,
kDefaultLeftMask
))
{
if
(
GetDecisionType
(
decision_type
_
[
node
]
,
kDefaultLeftMask
))
{
fval
=
threshold
;
return
left_child_
[
node
]
;
}
else
{
}
else
{
fval
=
10.0
f
*
threshold
;
return
right_child_
[
node
]
;
}
}
}
}
return
fval
;
if
(
fval
<=
threshold_
[
node
])
{
return
left_child_
[
node
];
}
else
{
return
right_child_
[
node
];
}
}
}
inline
static
const
char
*
GetDecisionTypeName
(
int8_t
type
)
{
inline
int
NumericalDecisionInner
(
uint32_t
fval
,
int
node
,
uint32_t
default_bin
,
uint32_t
max_bin
)
const
{
if
(
type
==
0
)
{
uint8_t
missing_type
=
GetMissingType
(
decision_type_
[
node
]);
return
"no_greater"
;
if
((
missing_type
==
1
&&
fval
==
default_bin
)
||
(
missing_type
==
2
&&
fval
==
max_bin
))
{
if
(
GetDecisionType
(
decision_type_
[
node
],
kDefaultLeftMask
))
{
return
left_child_
[
node
];
}
else
{
return
right_child_
[
node
];
}
}
if
(
fval
<=
threshold_in_bin_
[
node
])
{
return
left_child_
[
node
];
}
else
{
}
else
{
return
"is"
;
return
right_child_
[
node
]
;
}
}
}
}
static
std
::
vector
<
bool
(
*
)(
uint32_t
,
uint32_t
)
>
inner_decision_funs
;
inline
int
CategoricalDecision
(
double
fval
,
int
node
)
const
{
static
std
::
vector
<
bool
(
*
)(
double
,
double
)
>
decision_funs
;
uint8_t
missing_type
=
GetMissingType
(
decision_type_
[
node
]);
int
int_fval
=
static_cast
<
int
>
(
fval
);
if
(
int_fval
<
0
)
{
return
right_child_
[
node
];;
}
else
if
(
std
::
isnan
(
fval
))
{
// NaN is always in the right
if
(
missing_type
==
2
)
{
return
right_child_
[
node
];
}
int_fval
=
0
;
}
if
(
int_fval
==
static_cast
<
int
>
(
threshold_
[
node
]))
{
return
left_child_
[
node
];
}
return
right_child_
[
node
];
}
private:
inline
int
CategoricalDecisionInner
(
uint32_t
fval
,
int
node
)
const
{
if
(
fval
==
threshold_in_bin_
[
node
])
{
return
left_child_
[
node
];
}
return
right_child_
[
node
];
}
inline
int
Decision
(
double
fval
,
int
node
)
const
{
if
(
GetDecisionType
(
decision_type_
[
node
],
kCategoricalMask
))
{
return
CategoricalDecision
(
fval
,
node
);
}
else
{
return
NumericalDecision
(
fval
,
node
);
}
}
inline
int
DecisionInner
(
uint32_t
fval
,
int
node
,
uint32_t
default_bin
,
uint32_t
max_bin
)
const
{
if
(
GetDecisionType
(
decision_type_
[
node
],
kCategoricalMask
))
{
return
CategoricalDecisionInner
(
fval
,
node
);
}
else
{
return
NumericalDecisionInner
(
fval
,
node
,
default_bin
,
max_bin
);
}
}
inline
void
Split
(
int
leaf
,
int
feature
,
int
real_feature
,
double
left_value
,
double
right_value
,
data_size_t
left_cnt
,
data_size_t
right_cnt
,
double
gain
);
/*!
/*!
* \brief Find leaf index of which record belongs by features
* \brief Find leaf index of which record belongs by features
* \param feature_values Feature value of this record
* \param feature_values Feature value of this record
...
@@ -288,6 +363,7 @@ private:
...
@@ -288,6 +363,7 @@ private:
std
::
vector
<
uint32_t
>
threshold_in_bin_
;
std
::
vector
<
uint32_t
>
threshold_in_bin_
;
/*! \brief A non-leaf node's split threshold in feature value */
/*! \brief A non-leaf node's split threshold in feature value */
std
::
vector
<
double
>
threshold_
;
std
::
vector
<
double
>
threshold_
;
int
num_cat_
;
/*! \brief Store the information for categorical feature handle and mising value handle. */
/*! \brief Store the information for categorical feature handle and mising value handle. */
std
::
vector
<
int8_t
>
decision_type_
;
std
::
vector
<
int8_t
>
decision_type_
;
/*! \brief A non-leaf node's split gain */
/*! \brief A non-leaf node's split gain */
...
@@ -306,9 +382,44 @@ private:
...
@@ -306,9 +382,44 @@ private:
/*! \brief Depth for leaves */
/*! \brief Depth for leaves */
std
::
vector
<
int
>
leaf_depth_
;
std
::
vector
<
int
>
leaf_depth_
;
double
shrinkage_
;
double
shrinkage_
;
bool
has_categorical_
;
};
};
inline
void
Tree
::
Split
(
int
leaf
,
int
feature
,
int
real_feature
,
double
left_value
,
double
right_value
,
data_size_t
left_cnt
,
data_size_t
right_cnt
,
double
gain
)
{
int
new_node_idx
=
num_leaves_
-
1
;
// update parent info
int
parent
=
leaf_parent_
[
leaf
];
if
(
parent
>=
0
)
{
// if cur node is left child
if
(
left_child_
[
parent
]
==
~
leaf
)
{
left_child_
[
parent
]
=
new_node_idx
;
}
else
{
right_child_
[
parent
]
=
new_node_idx
;
}
}
// add new node
split_feature_inner_
[
new_node_idx
]
=
feature
;
split_feature_
[
new_node_idx
]
=
real_feature
;
split_gain_
[
new_node_idx
]
=
Common
::
AvoidInf
(
gain
);
// add two new leaves
left_child_
[
new_node_idx
]
=
~
leaf
;
right_child_
[
new_node_idx
]
=
~
num_leaves_
;
// update new leaves
leaf_parent_
[
leaf
]
=
new_node_idx
;
leaf_parent_
[
num_leaves_
]
=
new_node_idx
;
// save current leaf value to internal node before change
internal_value_
[
new_node_idx
]
=
leaf_value_
[
leaf
];
internal_count_
[
new_node_idx
]
=
left_cnt
+
right_cnt
;
leaf_value_
[
leaf
]
=
std
::
isnan
(
left_value
)
?
0.0
f
:
left_value
;
leaf_count_
[
leaf
]
=
left_cnt
;
leaf_value_
[
num_leaves_
]
=
std
::
isnan
(
right_value
)
?
0.0
f
:
right_value
;
leaf_count_
[
num_leaves_
]
=
right_cnt
;
// update leaf depth
leaf_depth_
[
num_leaves_
]
=
leaf_depth_
[
leaf
]
+
1
;
leaf_depth_
[
leaf
]
++
;
}
inline
double
Tree
::
Predict
(
const
double
*
feature_values
)
const
{
inline
double
Tree
::
Predict
(
const
double
*
feature_values
)
const
{
if
(
num_leaves_
>
1
)
{
if
(
num_leaves_
>
1
)
{
int
leaf
=
GetLeaf
(
feature_values
);
int
leaf
=
GetLeaf
(
feature_values
);
...
@@ -409,8 +520,7 @@ inline void Tree::TreeSHAP(const double *feature_values, double *phi,
...
@@ -409,8 +520,7 @@ inline void Tree::TreeSHAP(const double *feature_values, double *phi,
// internal node
// internal node
}
else
{
}
else
{
const
int
hot_index
=
const
int
hot_index
=
Decision
(
feature_values
[
split_index
],
node
);
decision_funs
[
GetDecisionType
(
decision_type_
[
node
],
kCategoricalMask
)](
feature_values
[
split_index
],
threshold_
[
node
]);
const
int
cold_index
=
(
hot_index
==
left_child_
[
node
]
?
right_child_
[
node
]
:
left_child_
[
node
]);
const
int
cold_index
=
(
hot_index
==
left_child_
[
node
]
?
right_child_
[
node
]
:
left_child_
[
node
]);
const
double
w
=
data_count
(
node
);
const
double
w
=
data_count
(
node
);
const
double
hot_zero_fraction
=
data_count
(
hot_index
)
/
w
;
const
double
hot_zero_fraction
=
data_count
(
hot_index
)
/
w
;
...
@@ -469,27 +579,13 @@ inline int Tree::MaxDepth() const {
...
@@ -469,27 +579,13 @@ inline int Tree::MaxDepth() const {
inline
int
Tree
::
GetLeaf
(
const
double
*
feature_values
)
const
{
inline
int
Tree
::
GetLeaf
(
const
double
*
feature_values
)
const
{
int
node
=
0
;
int
node
=
0
;
if
(
has
_cat
egorical_
)
{
if
(
num
_cat
_
>
0
)
{
while
(
node
>=
0
)
{
while
(
node
>=
0
)
{
double
fval
=
ConvertMissingValue
(
feature_values
[
split_feature_
[
node
]],
threshold_
[
node
],
decision_type_
[
node
]);
node
=
Decision
(
feature_values
[
split_feature_
[
node
]],
node
);
if
(
decision_funs
[
GetDecisionType
(
decision_type_
[
node
],
kCategoricalMask
)](
fval
,
threshold_
[
node
]))
{
node
=
left_child_
[
node
];
}
else
{
node
=
right_child_
[
node
];
}
}
}
}
else
{
}
else
{
while
(
node
>=
0
)
{
while
(
node
>=
0
)
{
double
fval
=
ConvertMissingValue
(
feature_values
[
split_feature_
[
node
]],
threshold_
[
node
],
decision_type_
[
node
]);
node
=
NumericalDecision
(
feature_values
[
split_feature_
[
node
]],
node
);
if
(
NumericalDecision
<
double
>
(
fval
,
threshold_
[
node
]))
{
node
=
left_child_
[
node
];
}
else
{
node
=
right_child_
[
node
];
}
}
}
}
}
return
~
node
;
return
~
node
;
...
...
src/boosting/gbdt.cpp
View file @
6c4a9750
...
@@ -473,7 +473,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
...
@@ -473,7 +473,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
auto
label
=
train_data_
->
metadata
().
label
();
auto
label
=
train_data_
->
metadata
().
label
();
double
init_score
=
ObtainAutomaticInitialScore
(
objective_function_
,
label
,
num_data_
);
double
init_score
=
ObtainAutomaticInitialScore
(
objective_function_
,
label
,
num_data_
);
std
::
unique_ptr
<
Tree
>
new_tree
(
new
Tree
(
2
));
std
::
unique_ptr
<
Tree
>
new_tree
(
new
Tree
(
2
));
new_tree
->
Split
(
0
,
0
,
BinType
::
NumericalBin
,
0
,
0
,
0
,
init_score
,
init_score
,
0
,
0
,
-
1
,
MissingType
::
None
,
true
);
new_tree
->
Split
(
0
,
0
,
0
,
0
,
0
,
init_score
,
init_score
,
0
,
0
,
-
1
,
MissingType
::
None
,
true
);
train_score_updater_
->
AddScore
(
init_score
,
0
);
train_score_updater_
->
AddScore
(
init_score
,
0
);
for
(
auto
&
score_updater
:
valid_score_updater_
)
{
for
(
auto
&
score_updater
:
valid_score_updater_
)
{
score_updater
->
AddScore
(
init_score
,
0
);
score_updater
->
AddScore
(
init_score
,
0
);
...
@@ -553,7 +553,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
...
@@ -553,7 +553,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
// only add default score one-time
// only add default score one-time
if
(
!
class_need_train_
[
cur_tree_id
]
&&
models_
.
size
()
<
static_cast
<
size_t
>
(
num_tree_per_iteration_
))
{
if
(
!
class_need_train_
[
cur_tree_id
]
&&
models_
.
size
()
<
static_cast
<
size_t
>
(
num_tree_per_iteration_
))
{
auto
output
=
class_default_output_
[
cur_tree_id
];
auto
output
=
class_default_output_
[
cur_tree_id
];
new_tree
->
Split
(
0
,
0
,
BinType
::
NumericalBin
,
0
,
0
,
0
,
new_tree
->
Split
(
0
,
0
,
0
,
0
,
0
,
output
,
output
,
0
,
0
,
-
1
,
MissingType
::
None
,
true
);
output
,
output
,
0
,
0
,
-
1
,
MissingType
::
None
,
true
);
train_score_updater_
->
AddScore
(
output
,
cur_tree_id
);
train_score_updater_
->
AddScore
(
output
,
cur_tree_id
);
for
(
auto
&
score_updater
:
valid_score_updater_
)
{
for
(
auto
&
score_updater
:
valid_score_updater_
)
{
...
...
src/boosting/rf.hpp
View file @
6c4a9750
...
@@ -127,7 +127,7 @@ public:
...
@@ -127,7 +127,7 @@ public:
if
(
!
class_need_train_
[
cur_tree_id
]
&&
models_
.
size
()
<
static_cast
<
size_t
>
(
num_tree_per_iteration_
))
{
if
(
!
class_need_train_
[
cur_tree_id
]
&&
models_
.
size
()
<
static_cast
<
size_t
>
(
num_tree_per_iteration_
))
{
double
output
=
class_default_output_
[
cur_tree_id
];
double
output
=
class_default_output_
[
cur_tree_id
];
objective_function_
->
ConvertOutput
(
&
output
,
&
output
);
objective_function_
->
ConvertOutput
(
&
output
,
&
output
);
new_tree
->
Split
(
0
,
0
,
BinType
::
NumericalBin
,
0
,
0
,
0
,
new_tree
->
Split
(
0
,
0
,
0
,
0
,
0
,
output
,
output
,
0
,
0
,
-
1
,
MissingType
::
None
,
true
);
output
,
output
,
0
,
0
,
-
1
,
MissingType
::
None
,
true
);
train_score_updater_
->
AddScore
(
output
,
cur_tree_id
);
train_score_updater_
->
AddScore
(
output
,
cur_tree_id
);
for
(
auto
&
score_updater
:
valid_score_updater_
)
{
for
(
auto
&
score_updater
:
valid_score_updater_
)
{
...
...
src/io/dense_bin.hpp
View file @
6c4a9750
...
@@ -190,11 +190,11 @@ public:
...
@@ -190,11 +190,11 @@ public:
virtual
data_size_t
Split
(
virtual
data_size_t
Split
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
default_bin
,
MissingType
missing_type
,
bool
default_left
,
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
default_bin
,
MissingType
missing_type
,
bool
default_left
,
uint32_t
threshold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
uint32_t
threshold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
,
BinType
bin_type
)
const
override
{
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
if
(
num_data
<=
0
)
{
return
0
;
}
if
(
num_data
<=
0
)
{
return
0
;
}
VAL_T
th
=
static_cast
<
VAL_T
>
(
threshold
+
min_bin
);
VAL_T
th
=
static_cast
<
VAL_T
>
(
threshold
+
min_bin
);
VAL_T
minb
=
static_cast
<
VAL_T
>
(
min_bin
);
const
VAL_T
minb
=
static_cast
<
VAL_T
>
(
min_bin
);
VAL_T
maxb
=
static_cast
<
VAL_T
>
(
max_bin
);
const
VAL_T
maxb
=
static_cast
<
VAL_T
>
(
max_bin
);
VAL_T
t_default_bin
=
static_cast
<
VAL_T
>
(
min_bin
+
default_bin
);
VAL_T
t_default_bin
=
static_cast
<
VAL_T
>
(
min_bin
+
default_bin
);
if
(
default_bin
==
0
)
{
if
(
default_bin
==
0
)
{
th
-=
1
;
th
-=
1
;
...
@@ -204,59 +204,41 @@ public:
...
@@ -204,59 +204,41 @@ public:
data_size_t
gt_count
=
0
;
data_size_t
gt_count
=
0
;
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_count
=
&
gt_count
;
data_size_t
*
default_count
=
&
gt_count
;
if
(
b
in_type
==
B
inType
::
N
umericalBin
)
{
if
(
miss
in
g
_type
==
Miss
in
g
Type
::
N
aN
)
{
if
(
missing_type
!=
MissingType
::
Zero
&&
default_bin
<=
threshold
)
{
if
(
default_bin
<=
threshold
)
{
default_indices
=
lte_indices
;
default_indices
=
lte_indices
;
default_count
=
&
lte_count
;
default_count
=
&
lte_count
;
}
}
if
(
default_left
&&
missing_type
==
MissingType
::
Zero
)
{
data_size_t
*
missing_default_indices
=
gt_indices
;
default_indices
=
lte_indices
;
data_size_t
*
missing_default_count
=
&
gt_count
;
default_count
=
&
lte_count
;
if
(
default_left
)
{
}
missing_default_indices
=
lte_indices
;
if
(
missing_type
==
MissingType
::
NaN
)
{
missing_default_count
=
&
lte_count
;
data_size_t
*
missing_default_indices
=
gt_indices
;
}
data_size_t
*
missing_default_count
=
&
gt_count
;
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
if
(
default_left
)
{
const
data_size_t
idx
=
data_indices
[
i
];
missing_default_indices
=
lte_indices
;
const
VAL_T
bin
=
data_
[
idx
];
missing_default_count
=
&
lte_count
;
if
(
bin
<
minb
||
bin
>
maxb
||
t_default_bin
==
bin
)
{
}
default_indices
[(
*
default_count
)
++
]
=
idx
;
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
}
else
if
(
bin
==
maxb
)
{
const
data_size_t
idx
=
data_indices
[
i
];
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
VAL_T
bin
=
data_
[
idx
];
}
else
if
(
bin
>
th
)
{
if
(
bin
<
minb
||
bin
>
maxb
||
t_default_bin
==
bin
)
{
gt_indices
[
gt_count
++
]
=
idx
;
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
{
}
else
if
(
bin
==
maxb
)
{
lte_indices
[
lte_count
++
]
=
idx
;
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
else
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
VAL_T
bin
=
data_
[
idx
];
if
(
bin
<
minb
||
bin
>
maxb
||
t_default_bin
==
bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
}
}
}
else
{
}
else
{
if
(
default_
bin
==
threshold
)
{
if
(
(
default_
left
&&
missing_type
==
MissingType
::
Zero
)
||
(
default_bin
<=
threshold
&&
missing_type
!=
MissingType
::
Zero
)
)
{
default_indices
=
lte_indices
;
default_indices
=
lte_indices
;
default_count
=
&
lte_count
;
default_count
=
&
lte_count
;
}
}
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
data_size_t
idx
=
data_indices
[
i
];
VAL_T
bin
=
data_
[
idx
];
const
VAL_T
bin
=
data_
[
idx
];
if
(
bin
<
minb
||
bin
>
maxb
||
t_default_bin
==
bin
)
{
if
(
bin
<
minb
||
bin
>
maxb
||
t_default_bin
==
bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
!=
th
)
{
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
lte_indices
[
lte_count
++
]
=
idx
;
...
@@ -266,6 +248,33 @@ public:
...
@@ -266,6 +248,33 @@ public:
return
lte_count
;
return
lte_count
;
}
}
virtual
data_size_t
SplitCategorical
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
default_bin
,
uint32_t
threshold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
if
(
num_data
<=
0
)
{
return
0
;
}
data_size_t
lte_count
=
0
;
data_size_t
gt_count
=
0
;
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_count
=
&
gt_count
;
if
(
threshold
==
default_bin
)
{
default_indices
=
lte_indices
;
default_count
=
&
lte_count
;
}
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
uint32_t
bin
=
data_
[
idx
];
if
(
bin
<
min_bin
||
bin
>
max_bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
-
min_bin
==
threshold
)
{
lte_indices
[
lte_count
++
]
=
idx
;
}
else
{
gt_indices
[
gt_count
++
]
=
idx
;
}
}
return
lte_count
;
}
data_size_t
num_data
()
const
override
{
return
num_data_
;
}
data_size_t
num_data
()
const
override
{
return
num_data_
;
}
/*! \brief not ordered bin for dense feature */
/*! \brief not ordered bin for dense feature */
...
...
src/io/dense_nbits_bin.hpp
View file @
6c4a9750
...
@@ -229,11 +229,11 @@ public:
...
@@ -229,11 +229,11 @@ public:
virtual
data_size_t
Split
(
virtual
data_size_t
Split
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
default_bin
,
MissingType
missing_type
,
bool
default_left
,
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
default_bin
,
MissingType
missing_type
,
bool
default_left
,
uint32_t
threshold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
uint32_t
threshold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
,
BinType
bin_type
)
const
override
{
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
if
(
num_data
<=
0
)
{
return
0
;
}
if
(
num_data
<=
0
)
{
return
0
;
}
uint8_t
th
=
static_cast
<
uint8_t
>
(
threshold
+
min_bin
);
uint8_t
th
=
static_cast
<
uint8_t
>
(
threshold
+
min_bin
);
uint8_t
minb
=
static_cast
<
uint8_t
>
(
min_bin
);
const
uint8_t
minb
=
static_cast
<
uint8_t
>
(
min_bin
);
uint8_t
maxb
=
static_cast
<
uint8_t
>
(
max_bin
);
const
uint8_t
maxb
=
static_cast
<
uint8_t
>
(
max_bin
);
uint8_t
t_default_bin
=
static_cast
<
uint8_t
>
(
min_bin
+
default_bin
);
uint8_t
t_default_bin
=
static_cast
<
uint8_t
>
(
min_bin
+
default_bin
);
if
(
default_bin
==
0
)
{
if
(
default_bin
==
0
)
{
th
-=
1
;
th
-=
1
;
...
@@ -243,59 +243,41 @@ public:
...
@@ -243,59 +243,41 @@ public:
data_size_t
gt_count
=
0
;
data_size_t
gt_count
=
0
;
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_count
=
&
gt_count
;
data_size_t
*
default_count
=
&
gt_count
;
if
(
b
in_type
==
B
inType
::
N
umericalBin
)
{
if
(
miss
in
g
_type
==
Miss
in
g
Type
::
N
aN
)
{
if
(
missing_type
!=
MissingType
::
Zero
&&
default_bin
<=
threshold
)
{
if
(
default_bin
<=
threshold
)
{
default_indices
=
lte_indices
;
default_indices
=
lte_indices
;
default_count
=
&
lte_count
;
default_count
=
&
lte_count
;
}
}
if
(
default_left
&&
missing_type
==
MissingType
::
Zero
)
{
data_size_t
*
missing_default_indices
=
gt_indices
;
default_indices
=
lte_indices
;
data_size_t
*
missing_default_count
=
&
gt_count
;
default_count
=
&
lte_count
;
if
(
default_left
)
{
missing_default_indices
=
lte_indices
;
missing_default_count
=
&
lte_count
;
}
}
if
(
missing_type
==
MissingType
::
NaN
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
data_size_t
*
missing_default_indices
=
gt_indices
;
const
data_size_t
idx
=
data_indices
[
i
];
data_size_t
*
missing_default_count
=
&
gt_count
;
const
uint8_t
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
if
(
default_left
)
{
if
(
bin
<
minb
||
bin
>
maxb
||
t_default_bin
==
bin
)
{
missing_default_indices
=
lte_indices
;
default_indices
[(
*
default_count
)
++
]
=
idx
;
missing_default_count
=
&
lte_count
;
}
else
if
(
bin
==
maxb
)
{
}
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
}
else
if
(
bin
>
th
)
{
const
data_size_t
idx
=
data_indices
[
i
];
gt_indices
[
gt_count
++
]
=
idx
;
const
auto
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
}
else
{
if
(
bin
<
minb
||
bin
>
maxb
||
t_default_bin
==
bin
)
{
lte_indices
[
lte_count
++
]
=
idx
;
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
==
maxb
)
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
else
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
auto
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
if
(
bin
<
minb
||
bin
>
maxb
||
t_default_bin
==
bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
}
}
}
else
{
}
else
{
if
(
default_
bin
==
threshold
)
{
if
(
(
default_
left
&&
missing_type
==
MissingType
::
Zero
)
||
(
default_bin
<=
threshold
&&
missing_type
!=
MissingType
::
Zero
)
)
{
default_indices
=
lte_indices
;
default_indices
=
lte_indices
;
default_count
=
&
lte_count
;
default_count
=
&
lte_count
;
}
}
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
data_size_t
idx
=
data_indices
[
i
];
const
auto
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
const
uint8_t
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
if
(
bin
<
minb
||
bin
>
maxb
||
t_default_bin
==
bin
)
{
if
(
bin
<
minb
||
bin
>
maxb
||
t_default_bin
==
bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
!=
th
)
{
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
lte_indices
[
lte_count
++
]
=
idx
;
...
@@ -304,6 +286,34 @@ public:
...
@@ -304,6 +286,34 @@ public:
}
}
return
lte_count
;
return
lte_count
;
}
}
virtual
data_size_t
SplitCategorical
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
default_bin
,
uint32_t
threshold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
if
(
num_data
<=
0
)
{
return
0
;
}
data_size_t
lte_count
=
0
;
data_size_t
gt_count
=
0
;
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_count
=
&
gt_count
;
if
(
default_bin
==
threshold
)
{
default_indices
=
lte_indices
;
default_count
=
&
lte_count
;
}
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
uint32_t
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
if
(
bin
<
min_bin
||
bin
>
max_bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
-
min_bin
==
threshold
)
{
lte_indices
[
lte_count
++
]
=
idx
;
}
else
{
gt_indices
[
gt_count
++
]
=
idx
;
}
}
return
lte_count
;
}
data_size_t
num_data
()
const
override
{
return
num_data_
;
}
data_size_t
num_data
()
const
override
{
return
num_data_
;
}
/*! \brief not ordered bin for dense feature */
/*! \brief not ordered bin for dense feature */
...
...
src/io/sparse_bin.hpp
View file @
6c4a9750
...
@@ -144,12 +144,12 @@ public:
...
@@ -144,12 +144,12 @@ public:
virtual
data_size_t
Split
(
virtual
data_size_t
Split
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
default_bin
,
MissingType
missing_type
,
bool
default_left
,
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
default_bin
,
MissingType
missing_type
,
bool
default_left
,
uint32_t
threshold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
uint32_t
threshold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
,
BinType
bin_type
)
const
override
{
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
// not need to split
// not need to split
if
(
num_data
<=
0
)
{
return
0
;
}
if
(
num_data
<=
0
)
{
return
0
;
}
VAL_T
th
=
static_cast
<
VAL_T
>
(
threshold
+
min_bin
);
VAL_T
th
=
static_cast
<
VAL_T
>
(
threshold
+
min_bin
);
VAL_T
minb
=
static_cast
<
VAL_T
>
(
min_bin
);
const
VAL_T
minb
=
static_cast
<
VAL_T
>
(
min_bin
);
VAL_T
maxb
=
static_cast
<
VAL_T
>
(
max_bin
);
const
VAL_T
maxb
=
static_cast
<
VAL_T
>
(
max_bin
);
VAL_T
t_default_bin
=
static_cast
<
VAL_T
>
(
min_bin
+
default_bin
);
VAL_T
t_default_bin
=
static_cast
<
VAL_T
>
(
min_bin
+
default_bin
);
if
(
default_bin
==
0
)
{
if
(
default_bin
==
0
)
{
th
-=
1
;
th
-=
1
;
...
@@ -160,64 +160,74 @@ public:
...
@@ -160,64 +160,74 @@ public:
data_size_t
gt_count
=
0
;
data_size_t
gt_count
=
0
;
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_count
=
&
gt_count
;
data_size_t
*
default_count
=
&
gt_count
;
if
(
b
in_type
==
B
inType
::
N
umericalBin
)
{
if
(
miss
in
g
_type
==
Miss
in
g
Type
::
N
aN
)
{
if
(
missing_type
!=
MissingType
::
Zero
&&
default_bin
<=
threshold
)
{
if
(
default_bin
<=
threshold
)
{
default_indices
=
lte_indices
;
default_indices
=
lte_indices
;
default_count
=
&
lte_count
;
default_count
=
&
lte_count
;
}
}
if
(
default_left
&&
missing_type
==
MissingType
::
Zero
)
{
data_size_t
*
missing_default_indices
=
gt_indices
;
default_indices
=
lte_indices
;
data_size_t
*
missing_default_count
=
&
gt_count
;
default_count
=
&
lte_count
;
if
(
default_left
)
{
missing_default_indices
=
lte_indices
;
missing_default_count
=
&
lte_count
;
}
}
if
(
missing_type
==
MissingType
::
NaN
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
data_size_t
*
missing_default_indices
=
gt_indices
;
const
data_size_t
idx
=
data_indices
[
i
];
data_size_t
*
missing_default_count
=
&
gt_count
;
const
VAL_T
bin
=
iterator
.
InnerRawGet
(
idx
);
if
(
default_left
)
{
if
(
bin
<
minb
||
bin
>
maxb
||
t_default_bin
==
bin
)
{
missing_default_indices
=
lte_indices
;
default_indices
[(
*
default_count
)
++
]
=
idx
;
missing_default_count
=
&
lte_count
;
}
else
if
(
bin
==
maxb
)
{
}
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
}
else
if
(
bin
>
th
)
{
const
data_size_t
idx
=
data_indices
[
i
];
gt_indices
[
gt_count
++
]
=
idx
;
VAL_T
bin
=
iterator
.
InnerRawGet
(
idx
);
}
else
{
if
(
bin
<
minb
||
bin
>
maxb
||
t_default_bin
==
bin
)
{
lte_indices
[
lte_count
++
]
=
idx
;
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
==
maxb
)
{
missing_default_indices
[(
*
missing_default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
else
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
VAL_T
bin
=
iterator
.
InnerRawGet
(
idx
);
if
(
bin
<
minb
||
bin
>
maxb
||
t_default_bin
==
bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
}
}
}
else
{
}
else
{
if
(
default_
bin
==
threshold
)
{
if
(
(
default_
left
&&
missing_type
==
MissingType
::
Zero
)
||
(
default_bin
<=
threshold
&&
missing_type
!=
MissingType
::
Zero
)
)
{
default_indices
=
lte_indices
;
default_indices
=
lte_indices
;
default_count
=
&
lte_count
;
default_count
=
&
lte_count
;
}
}
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
data_size_t
idx
=
data_indices
[
i
];
VAL_T
bin
=
iterator
.
InnerRawGet
(
idx
);
const
VAL_T
bin
=
iterator
.
InnerRawGet
(
idx
);
if
(
bin
<
minb
||
bin
>
maxb
||
t_default_bin
==
bin
)
{
if
(
bin
<
minb
||
bin
>
maxb
||
t_default_bin
==
bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
!=
th
)
{
}
else
if
(
bin
>
th
)
{
gt_indices
[
gt_count
++
]
=
idx
;
gt_indices
[
gt_count
++
]
=
idx
;
}
else
{
}
else
{
lte_indices
[
lte_count
++
]
=
idx
;
lte_indices
[
lte_count
++
]
=
idx
;
}
}
}
}
}
return
lte_count
;
}
virtual
data_size_t
SplitCategorical
(
uint32_t
min_bin
,
uint32_t
max_bin
,
uint32_t
default_bin
,
uint32_t
threshold
,
data_size_t
*
data_indices
,
data_size_t
num_data
,
data_size_t
*
lte_indices
,
data_size_t
*
gt_indices
)
const
override
{
if
(
num_data
<=
0
)
{
return
0
;
}
data_size_t
lte_count
=
0
;
data_size_t
gt_count
=
0
;
SparseBinIterator
<
VAL_T
>
iterator
(
this
,
data_indices
[
0
]);
data_size_t
*
default_indices
=
gt_indices
;
data_size_t
*
default_count
=
&
gt_count
;
if
(
default_bin
==
threshold
)
{
default_indices
=
lte_indices
;
default_count
=
&
lte_count
;
}
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
uint32_t
bin
=
iterator
.
InnerRawGet
(
idx
);
if
(
bin
<
min_bin
||
bin
>
max_bin
)
{
default_indices
[(
*
default_count
)
++
]
=
idx
;
}
else
if
(
bin
-
min_bin
==
threshold
)
{
lte_indices
[
lte_count
++
]
=
idx
;
}
else
{
gt_indices
[
gt_count
++
]
=
idx
;
}
}
}
return
lte_count
;
return
lte_count
;
}
}
...
...
src/io/tree.cpp
View file @
6c4a9750
This diff is collapsed.
Click to expand it.
src/metric/dcg_calculator.cpp
View file @
6c4a9750
...
@@ -84,9 +84,9 @@ void DCGCalculator::CalMaxDCG(const std::vector<data_size_t>& ks,
...
@@ -84,9 +84,9 @@ void DCGCalculator::CalMaxDCG(const std::vector<data_size_t>& ks,
double
DCGCalculator
::
CalDCGAtK
(
data_size_t
k
,
const
float
*
label
,
double
DCGCalculator
::
CalDCGAtK
(
data_size_t
k
,
const
float
*
label
,
const
double
*
score
,
data_size_t
num_data
)
{
const
double
*
score
,
data_size_t
num_data
)
{
// get sorted indices by score
// get sorted indices by score
std
::
vector
<
data_size_t
>
sorted_idx
;
std
::
vector
<
data_size_t
>
sorted_idx
(
num_data
)
;
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
sorted_idx
.
emplace_back
(
i
)
;
sorted_idx
[
i
]
=
i
;
}
}
std
::
sort
(
sorted_idx
.
begin
(),
sorted_idx
.
end
(),
std
::
sort
(
sorted_idx
.
begin
(),
sorted_idx
.
end
(),
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
...
@@ -104,9 +104,9 @@ double DCGCalculator::CalDCGAtK(data_size_t k, const float* label,
...
@@ -104,9 +104,9 @@ double DCGCalculator::CalDCGAtK(data_size_t k, const float* label,
void
DCGCalculator
::
CalDCG
(
const
std
::
vector
<
data_size_t
>&
ks
,
const
float
*
label
,
void
DCGCalculator
::
CalDCG
(
const
std
::
vector
<
data_size_t
>&
ks
,
const
float
*
label
,
const
double
*
score
,
data_size_t
num_data
,
std
::
vector
<
double
>*
out
)
{
const
double
*
score
,
data_size_t
num_data
,
std
::
vector
<
double
>*
out
)
{
// get sorted indices by score
// get sorted indices by score
std
::
vector
<
data_size_t
>
sorted_idx
;
std
::
vector
<
data_size_t
>
sorted_idx
(
num_data
)
;
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
sorted_idx
.
emplace_back
(
i
)
;
sorted_idx
[
i
]
=
i
;
}
}
std
::
sort
(
sorted_idx
.
begin
(),
sorted_idx
.
end
(),
std
::
sort
(
sorted_idx
.
begin
(),
sorted_idx
.
end
(),
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
...
...
src/treelearner/serial_tree_learner.cpp
View file @
6c4a9750
...
@@ -516,27 +516,40 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
...
@@ -516,27 +516,40 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
}
}
void
SerialTreeLearner
::
Split
(
Tree
*
tree
,
int
best_
L
eaf
,
int
*
left_leaf
,
int
*
right_leaf
)
{
void
SerialTreeLearner
::
Split
(
Tree
*
tree
,
int
best_
l
eaf
,
int
*
left_leaf
,
int
*
right_leaf
)
{
const
SplitInfo
&
best_split_info
=
best_split_per_leaf_
[
best_
L
eaf
];
const
SplitInfo
&
best_split_info
=
best_split_per_leaf_
[
best_
l
eaf
];
const
int
inner_feature_index
=
train_data_
->
InnerFeatureIndex
(
best_split_info
.
feature
);
const
int
inner_feature_index
=
train_data_
->
InnerFeatureIndex
(
best_split_info
.
feature
);
// left = parent
// left = parent
*
left_leaf
=
best_Leaf
;
*
left_leaf
=
best_leaf
;
// split tree, will return right leaf
if
(
train_data_
->
FeatureBinMapper
(
inner_feature_index
)
->
bin_type
()
==
BinType
::
NumericalBin
)
{
*
right_leaf
=
tree
->
Split
(
best_Leaf
,
// split tree, will return right leaf
inner_feature_index
,
*
right_leaf
=
tree
->
Split
(
best_leaf
,
train_data_
->
FeatureBinMapper
(
inner_feature_index
)
->
bin_type
(),
inner_feature_index
,
best_split_info
.
threshold
,
best_split_info
.
feature
,
best_split_info
.
feature
,
best_split_info
.
threshold
,
train_data_
->
RealThreshold
(
inner_feature_index
,
best_split_info
.
threshold
),
train_data_
->
RealThreshold
(
inner_feature_index
,
best_split_info
.
threshold
),
static_cast
<
double
>
(
best_split_info
.
left_output
),
static_cast
<
double
>
(
best_split_info
.
left_output
),
static_cast
<
double
>
(
best_split_info
.
right_output
),
static_cast
<
double
>
(
best_split_info
.
right_output
),
static_cast
<
data_size_t
>
(
best_split_info
.
left_count
),
static_cast
<
data_size_t
>
(
best_split_info
.
left_count
),
static_cast
<
data_size_t
>
(
best_split_info
.
right_count
),
static_cast
<
data_size_t
>
(
best_split_info
.
right_count
),
static_cast
<
double
>
(
best_split_info
.
gain
),
static_cast
<
double
>
(
best_split_info
.
gain
),
train_data_
->
FeatureBinMapper
(
inner_feature_index
)
->
missing_type
(),
train_data_
->
FeatureBinMapper
(
inner_feature_index
)
->
missing_type
(),
best_split_info
.
default_left
);
best_split_info
.
default_left
);
}
else
{
*
right_leaf
=
tree
->
SplitCategorical
(
best_leaf
,
inner_feature_index
,
best_split_info
.
feature
,
best_split_info
.
threshold
,
train_data_
->
RealThreshold
(
inner_feature_index
,
best_split_info
.
threshold
),
static_cast
<
double
>
(
best_split_info
.
left_output
),
static_cast
<
double
>
(
best_split_info
.
right_output
),
static_cast
<
data_size_t
>
(
best_split_info
.
left_count
),
static_cast
<
data_size_t
>
(
best_split_info
.
right_count
),
static_cast
<
double
>
(
best_split_info
.
gain
),
train_data_
->
FeatureBinMapper
(
inner_feature_index
)
->
missing_type
());
}
// split data partition
// split data partition
data_partition_
->
Split
(
best_
L
eaf
,
train_data_
,
inner_feature_index
,
data_partition_
->
Split
(
best_
l
eaf
,
train_data_
,
inner_feature_index
,
best_split_info
.
threshold
,
best_split_info
.
default_left
,
*
right_leaf
);
best_split_info
.
threshold
,
best_split_info
.
default_left
,
*
right_leaf
);
// init the leaves that used on next iteration
// init the leaves that used on next iteration
...
...
windows/LightGBM.vcxproj
View file @
6c4a9750
...
@@ -218,6 +218,7 @@
...
@@ -218,6 +218,7 @@
<ClInclude
Include=
"..\src\boosting\gbdt.h"
/>
<ClInclude
Include=
"..\src\boosting\gbdt.h"
/>
<ClInclude
Include=
"..\src\boosting\dart.hpp"
/>
<ClInclude
Include=
"..\src\boosting\dart.hpp"
/>
<ClInclude
Include=
"..\src\boosting\goss.hpp"
/>
<ClInclude
Include=
"..\src\boosting\goss.hpp"
/>
<ClInclude
Include=
"..\src\boosting\rf.hpp"
/>
<ClInclude
Include=
"..\src\boosting\score_updater.hpp"
/>
<ClInclude
Include=
"..\src\boosting\score_updater.hpp"
/>
<ClInclude
Include=
"..\src\io\dense_bin.hpp"
/>
<ClInclude
Include=
"..\src\io\dense_bin.hpp"
/>
<ClInclude
Include=
"..\src\io\dense_nbits_bin.hpp"
/>
<ClInclude
Include=
"..\src\io\dense_nbits_bin.hpp"
/>
...
...
windows/LightGBM.vcxproj.filters
View file @
6c4a9750
...
@@ -192,6 +192,9 @@
...
@@ -192,6 +192,9 @@
<ClInclude
Include=
"..\include\LightGBM\R_object_helper.h"
>
<ClInclude
Include=
"..\include\LightGBM\R_object_helper.h"
>
<Filter>
include\LightGBM
</Filter>
<Filter>
include\LightGBM
</Filter>
</ClInclude>
</ClInclude>
<ClInclude
Include=
"..\src\boosting\rf.hpp"
>
<Filter>
src\boosting
</Filter>
</ClInclude>
</ItemGroup>
</ItemGroup>
<ItemGroup>
<ItemGroup>
<ClCompile
Include=
"..\src\application\application.cpp"
>
<ClCompile
Include=
"..\src\application\application.cpp"
>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment