Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
3b50aeac
Commit
3b50aeac
authored
Nov 02, 2016
by
Guolin Ke
Browse files
merge from master
parents
504d400c
c96ae6af
Changes
30
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
251 additions
and
213 deletions
+251
-213
include/LightGBM/bin.h
include/LightGBM/bin.h
+8
-8
include/LightGBM/boosting.h
include/LightGBM/boosting.h
+4
-4
include/LightGBM/config.h
include/LightGBM/config.h
+15
-15
include/LightGBM/dataset.h
include/LightGBM/dataset.h
+2
-2
include/LightGBM/feature.h
include/LightGBM/feature.h
+2
-2
include/LightGBM/metric.h
include/LightGBM/metric.h
+9
-9
include/LightGBM/objective_function.h
include/LightGBM/objective_function.h
+1
-1
include/LightGBM/tree.h
include/LightGBM/tree.h
+14
-14
include/LightGBM/utils/common.h
include/LightGBM/utils/common.h
+29
-25
src/application/application.cpp
src/application/application.cpp
+2
-2
src/application/predictor.hpp
src/application/predictor.hpp
+19
-19
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+12
-12
src/boosting/gbdt.h
src/boosting/gbdt.h
+6
-6
src/io/bin.cpp
src/io/bin.cpp
+25
-25
src/io/config.cpp
src/io/config.cpp
+11
-11
src/io/dataset.cpp
src/io/dataset.cpp
+52
-18
src/io/metadata.cpp
src/io/metadata.cpp
+4
-4
src/io/parser.hpp
src/io/parser.hpp
+6
-6
src/io/tree.cpp
src/io/tree.cpp
+15
-15
src/metric/binary_metric.hpp
src/metric/binary_metric.hpp
+15
-15
No files found.
include/LightGBM/bin.h
View file @
3b50aeac
...
...
@@ -56,7 +56,7 @@ public:
/*! \brief True if bin is trival (contains only one bin) */
inline
bool
is_trival
()
const
{
return
is_trival_
;
}
/*! \brief Sparsity of this bin ( num_zero_bins / num_data ) */
inline
float
sparse_rate
()
const
{
return
sparse_rate_
;
}
inline
double
sparse_rate
()
const
{
return
sparse_rate_
;
}
/*!
* \brief Save binary data to file
* \param file File want to write
...
...
@@ -67,7 +67,7 @@ public:
* \param bin
* \return Feature value of this bin
*/
inline
float
BinToValue
(
unsigned
int
bin
)
const
{
inline
double
BinToValue
(
unsigned
int
bin
)
const
{
return
bin_upper_bound_
[
bin
];
}
/*!
...
...
@@ -79,14 +79,14 @@ public:
* \param value
* \return bin for this feature value
*/
inline
unsigned
int
ValueToBin
(
float
value
)
const
;
inline
unsigned
int
ValueToBin
(
double
value
)
const
;
/*!
* \brief Construct feature value to bin mapper according feature values
* \param values (Sampled) values of this feature
* \param max_bin The maximal number of bin
*/
void
FindBin
(
std
::
vector
<
float
>*
values
,
int
max_bin
);
void
FindBin
(
std
::
vector
<
double
>*
values
,
int
max_bin
);
/*!
* \brief Use specific number of bin to calculate the size of this class
...
...
@@ -111,11 +111,11 @@ private:
/*! \brief Number of bins */
int
num_bin_
;
/*! \brief Store upper bound for each bin */
float
*
bin_upper_bound_
;
double
*
bin_upper_bound_
;
/*! \brief True if this feature is trival */
bool
is_trival_
;
/*! \brief Sparse rate of this bins( num_bin0/num_data ) */
float
sparse_rate_
;
double
sparse_rate_
;
};
/*!
...
...
@@ -271,7 +271,7 @@ public:
* \return The bin data object
*/
static
Bin
*
CreateBin
(
data_size_t
num_data
,
int
num_bin
,
float
sparse_rate
,
bool
is_enable_sparse
,
bool
*
is_sparse
,
int
default_bin
);
double
sparse_rate
,
bool
is_enable_sparse
,
bool
*
is_sparse
,
int
default_bin
);
/*!
* \brief Create object for bin data of one feature, used for dense feature
...
...
@@ -293,7 +293,7 @@ public:
int
num_bin
,
int
default_bin
);
};
inline
unsigned
int
BinMapper
::
ValueToBin
(
float
value
)
const
{
inline
unsigned
int
BinMapper
::
ValueToBin
(
double
value
)
const
{
// binary search to find bin
int
l
=
0
;
int
r
=
num_bin_
-
1
;
...
...
include/LightGBM/boosting.h
View file @
3b50aeac
...
...
@@ -58,7 +58,7 @@ public:
* \param num_used_model Number of used model
* \return Prediction result for this record
*/
virtual
float
PredictRaw
(
const
float
*
feature_values
,
virtual
double
PredictRaw
(
const
double
*
feature_values
,
int
num_used_model
)
const
=
0
;
/*!
...
...
@@ -67,7 +67,7 @@ public:
* \param num_used_model Number of used model
* \return Prediction result for this record
*/
virtual
float
Predict
(
const
float
*
feature_values
,
virtual
double
Predict
(
const
double
*
feature_values
,
int
num_used_model
)
const
=
0
;
/*!
...
...
@@ -77,7 +77,7 @@ public:
* \return Predicted leaf index for this record
*/
virtual
std
::
vector
<
int
>
PredictLeafIndex
(
const
float
*
feature_values
,
const
double
*
feature_values
,
int
num_used_model
)
const
=
0
;
/*!
...
...
@@ -85,7 +85,7 @@ public:
* \param feature_values Feature value on this record
* \return Prediction result, num_class numbers per line
*/
virtual
std
::
vector
<
float
>
PredictMulticlass
(
const
float
*
value
,
int
num_used_model
)
const
=
0
;
virtual
std
::
vector
<
double
>
PredictMulticlass
(
const
double
*
value
,
int
num_used_model
)
const
=
0
;
/*!
* \brief save model to file
...
...
include/LightGBM/config.h
View file @
3b50aeac
...
...
@@ -49,15 +49,15 @@ public:
const
std
::
string
&
name
,
int
*
out
);
/*!
* \brief Get
float
value by specific name of key
* \brief Get
double
value by specific name of key
* \param params Store the key and value for params
* \param name Name of key
* \param out Value will assign to out if key exists
* \return True if key exists
*/
inline
bool
Get
Float
(
inline
bool
Get
Double
(
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>&
params
,
const
std
::
string
&
name
,
float
*
out
);
const
std
::
string
&
name
,
double
*
out
);
/*!
* \brief Get bool value by specific name of key
...
...
@@ -123,9 +123,9 @@ public:
struct
ObjectiveConfig
:
public
ConfigBase
{
public:
virtual
~
ObjectiveConfig
()
{}
float
sigmoid
=
1.0
f
;
double
sigmoid
=
1.0
f
;
// for lambdarank
std
::
vector
<
float
>
label_gain
;
std
::
vector
<
double
>
label_gain
;
// for lambdarank
int
max_position
=
20
;
// for binary
...
...
@@ -140,8 +140,8 @@ struct MetricConfig: public ConfigBase {
public:
virtual
~
MetricConfig
()
{}
int
num_class
=
1
;
float
sigmoid
=
1.0
f
;
std
::
vector
<
float
>
label_gain
;
double
sigmoid
=
1.0
f
;
std
::
vector
<
double
>
label_gain
;
std
::
vector
<
int
>
eval_at
;
void
Set
(
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>&
params
)
override
;
};
...
...
@@ -151,13 +151,13 @@ public:
struct
TreeConfig
:
public
ConfigBase
{
public:
int
min_data_in_leaf
=
100
;
float
min_sum_hessian_in_leaf
=
10.0
f
;
double
min_sum_hessian_in_leaf
=
10.0
f
;
// should > 1, only one leaf means not need to learning
int
num_leaves
=
127
;
int
feature_fraction_seed
=
2
;
float
feature_fraction
=
1.0
f
;
double
feature_fraction
=
1.0
f
;
// max cache size(unit:MB) for historical histogram. < 0 means not limit
float
histogram_pool_size
=
-
1.0
f
;
double
histogram_pool_size
=
-
1.0
f
;
// max depth of tree model.
// Still grow tree by leaf-wise, but limit the max depth to avoid over-fitting
// And the max leaves will be min(num_leaves, pow(2, max_depth - 1))
...
...
@@ -179,8 +179,8 @@ public:
int
output_freq
=
1
;
bool
is_provide_training_metric
=
false
;
int
num_iterations
=
10
;
float
learning_rate
=
0.1
f
;
float
bagging_fraction
=
1.0
f
;
double
learning_rate
=
0.1
f
;
double
bagging_fraction
=
1.0
f
;
int
bagging_seed
=
3
;
int
bagging_freq
=
0
;
int
early_stopping_round
=
0
;
...
...
@@ -268,12 +268,12 @@ inline bool ConfigBase::GetInt(
return
false
;
}
inline
bool
ConfigBase
::
Get
Float
(
inline
bool
ConfigBase
::
Get
Double
(
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>&
params
,
const
std
::
string
&
name
,
float
*
out
)
{
const
std
::
string
&
name
,
double
*
out
)
{
if
(
params
.
count
(
name
)
>
0
)
{
if
(
!
Common
::
AtofAndCheck
(
params
.
at
(
name
).
c_str
(),
out
))
{
Log
::
Fatal
(
"Parameter %s should be
float
type, passed is [%s]"
,
Log
::
Fatal
(
"Parameter %s should be
double
type, passed is [%s]"
,
name
.
c_str
(),
params
.
at
(
name
).
c_str
());
}
return
true
;
...
...
include/LightGBM/dataset.h
View file @
3b50aeac
...
...
@@ -227,7 +227,7 @@ public:
* \param out_label Label will store to this if exists
*/
virtual
void
ParseOneLine
(
const
char
*
str
,
std
::
vector
<
std
::
pair
<
int
,
float
>>*
out_features
,
float
*
out_label
)
const
=
0
;
std
::
vector
<
std
::
pair
<
int
,
double
>>*
out_features
,
double
*
out_label
)
const
=
0
;
/*!
* \brief Create a object of parser, will auto choose the format depend on file
...
...
@@ -240,7 +240,7 @@ public:
};
using
PredictFunction
=
std
::
function
<
float
(
const
std
::
vector
<
std
::
pair
<
int
,
float
>>&
)
>
;
std
::
function
<
double
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
)
>
;
/*! \brief The main class of data set,
* which are used to traning or validation
...
...
include/LightGBM/feature.h
View file @
3b50aeac
...
...
@@ -71,7 +71,7 @@ public:
* \param idx Index of record
* \param value feature value of record
*/
inline
void
PushData
(
int
tid
,
data_size_t
line_idx
,
float
value
)
{
inline
void
PushData
(
int
tid
,
data_size_t
line_idx
,
double
value
)
{
unsigned
int
bin
=
bin_mapper_
->
ValueToBin
(
value
);
bin_data_
->
Push
(
tid
,
line_idx
,
bin
);
}
...
...
@@ -89,7 +89,7 @@ public:
* \param bin
* \return Feature value of this bin
*/
inline
float
BinToValue
(
unsigned
int
bin
)
inline
double
BinToValue
(
unsigned
int
bin
)
const
{
return
bin_mapper_
->
BinToValue
(
bin
);
}
/*!
...
...
include/LightGBM/metric.h
View file @
3b50aeac
...
...
@@ -34,7 +34,7 @@ public:
* \brief Calcaluting and printing metric result
* \param score Current prediction score
*/
virtual
std
::
vector
<
float
>
Eval
(
const
score_t
*
score
)
const
=
0
;
virtual
std
::
vector
<
double
>
Eval
(
const
score_t
*
score
)
const
=
0
;
/*!
* \brief Create object of metrics
...
...
@@ -54,7 +54,7 @@ public:
* \brief Initial logic
* \param label_gain Gain for labels, default is 2^i - 1
*/
static
void
Init
(
std
::
vector
<
float
>
label_gain
);
static
void
Init
(
std
::
vector
<
double
>
label_gain
);
/*!
* \brief Calculate the DCG score at position k
...
...
@@ -64,7 +64,7 @@ public:
* \param num_data Number of data
* \return The DCG score
*/
static
floa
t
CalDCGAtK
(
data_size_t
k
,
const
float
*
label
,
static
score_
t
CalDCGAtK
(
data_size_t
k
,
const
float
*
label
,
const
score_t
*
score
,
data_size_t
num_data
);
/*!
...
...
@@ -77,7 +77,7 @@ public:
*/
static
void
CalDCG
(
const
std
::
vector
<
data_size_t
>&
ks
,
const
float
*
label
,
const
score_t
*
score
,
data_size_t
num_data
,
std
::
vector
<
floa
t
>*
out
);
data_size_t
num_data
,
std
::
vector
<
score_
t
>*
out
);
/*!
* \brief Calculate the Max DCG score at position k
...
...
@@ -86,7 +86,7 @@ public:
* \param num_data Number of data
* \return The max DCG score
*/
static
floa
t
CalMaxDCGAtK
(
data_size_t
k
,
static
score_
t
CalMaxDCGAtK
(
data_size_t
k
,
const
float
*
label
,
data_size_t
num_data
);
/*!
...
...
@@ -97,22 +97,22 @@ public:
* \param out Output result
*/
static
void
CalMaxDCG
(
const
std
::
vector
<
data_size_t
>&
ks
,
const
float
*
label
,
data_size_t
num_data
,
std
::
vector
<
floa
t
>*
out
);
const
float
*
label
,
data_size_t
num_data
,
std
::
vector
<
score_
t
>*
out
);
/*!
* \brief Get discount score of position k
* \param k The position
* \return The discount of this position
*/
inline
static
floa
t
GetDiscount
(
data_size_t
k
)
{
return
discount_
[
k
];
}
inline
static
score_
t
GetDiscount
(
data_size_t
k
)
{
return
discount_
[
k
];
}
private:
/*! \brief True if inited, avoid init multi times */
static
bool
is_inited_
;
/*! \brief store gains for different label */
static
std
::
vector
<
floa
t
>
label_gain_
;
static
std
::
vector
<
score_
t
>
label_gain_
;
/*! \brief store discount score for different position */
static
std
::
vector
<
floa
t
>
discount_
;
static
std
::
vector
<
score_
t
>
discount_
;
/*! \brief max position for eval */
static
const
data_size_t
kMaxPosition
;
};
...
...
include/LightGBM/objective_function.h
View file @
3b50aeac
...
...
@@ -36,7 +36,7 @@ public:
* This function is used for prediction task, if has sigmoid param, the prediction value will be transform by sigmoid function.
* \return Sigmoid param, if <=0.0 means don't use sigmoid transform on this objective.
*/
virtual
floa
t
GetSigmoid
()
const
=
0
;
virtual
score_
t
GetSigmoid
()
const
=
0
;
/*!
* \brief Create object of objective function
...
...
include/LightGBM/tree.h
View file @
3b50aeac
...
...
@@ -36,18 +36,18 @@ public:
* \param feature Index of feature; the converted index after removing useless features
* \param threshold Threshold(bin) of split
* \param real_feature Index of feature, the original index on data
* \param threshold_
float
Threshold on feature value
* \param threshold_
double
Threshold on feature value
* \param left_value Model Left child output
* \param right_value Model Right child output
* \param gain Split gain
* \return The index of new leaf.
*/
int
Split
(
int
leaf
,
int
feature
,
unsigned
int
threshold
,
int
real_feature
,
float
threshold_
float
,
float
left_value
,
float
right_value
,
float
gain
);
double
threshold_
double
,
double
left_value
,
double
right_value
,
double
gain
);
/*! \brief Get the output of one leave */
inline
float
LeafOutput
(
int
leaf
)
const
{
return
leaf_value_
[
leaf
];
}
inline
double
LeafOutput
(
int
leaf
)
const
{
return
leaf_value_
[
leaf
];
}
/*!
* \brief Adding prediction value of this tree model to scores
...
...
@@ -74,8 +74,8 @@ public:
* \param feature_values Feature value of this record
* \return Prediction result
*/
inline
float
Predict
(
const
float
*
feature_values
)
const
;
inline
int
PredictLeafIndex
(
const
float
*
feature_values
)
const
;
inline
double
Predict
(
const
double
*
feature_values
)
const
;
inline
int
PredictLeafIndex
(
const
double
*
feature_values
)
const
;
/*! \brief Get Number of leaves*/
inline
int
num_leaves
()
const
{
return
num_leaves_
;
}
...
...
@@ -91,7 +91,7 @@ public:
* shrinkage rate (a.k.a learning rate) is used to tune the traning process
* \param rate The factor of shrinkage
*/
inline
void
Shrinkage
(
float
rate
)
{
inline
void
Shrinkage
(
double
rate
)
{
for
(
int
i
=
0
;
i
<
num_leaves_
;
++
i
)
{
leaf_value_
[
i
]
=
leaf_value_
[
i
]
*
rate
;
}
...
...
@@ -119,7 +119,7 @@ private:
* \param feature_values Feature value of this record
* \return Leaf index
*/
inline
int
GetLeaf
(
const
float
*
feature_values
)
const
;
inline
int
GetLeaf
(
const
double
*
feature_values
)
const
;
/*! \brief Number of max leaves*/
int
max_leaves_
;
...
...
@@ -137,25 +137,25 @@ private:
/*! \brief A non-leaf node's split threshold in bin */
unsigned
int
*
threshold_in_bin_
;
/*! \brief A non-leaf node's split threshold in feature value */
float
*
threshold_
;
double
*
threshold_
;
/*! \brief A non-leaf node's split gain */
float
*
split_gain_
;
double
*
split_gain_
;
// used for leaf node
/*! \brief The parent of leaf */
int
*
leaf_parent_
;
/*! \brief Output of leaves */
float
*
leaf_value_
;
double
*
leaf_value_
;
/*! \brief Depth for leaves */
int
*
leaf_depth_
;
};
inline
float
Tree
::
Predict
(
const
float
*
feature_values
)
const
{
inline
double
Tree
::
Predict
(
const
double
*
feature_values
)
const
{
int
leaf
=
GetLeaf
(
feature_values
);
return
LeafOutput
(
leaf
);
}
inline
int
Tree
::
PredictLeafIndex
(
const
float
*
feature_values
)
const
{
inline
int
Tree
::
PredictLeafIndex
(
const
double
*
feature_values
)
const
{
int
leaf
=
GetLeaf
(
feature_values
);
return
leaf
;
}
...
...
@@ -174,7 +174,7 @@ inline int Tree::GetLeaf(const std::vector<BinIterator*>& iterators,
return
~
node
;
}
inline
int
Tree
::
GetLeaf
(
const
float
*
feature_values
)
const
{
inline
int
Tree
::
GetLeaf
(
const
double
*
feature_values
)
const
{
int
node
=
0
;
while
(
node
>=
0
)
{
if
(
feature_values
[
split_feature_real_
[
node
]]
<=
threshold_
[
node
])
{
...
...
include/LightGBM/utils/common.h
View file @
3b50aeac
...
...
@@ -102,10 +102,9 @@ inline static const char* Atoi(const char* p, int* out) {
return
p
;
}
//ref to http://www.leapsecond.com/tools/fast_atof.c
inline
static
const
char
*
Atof
(
const
char
*
p
,
float
*
out
)
{
inline
static
const
char
*
Atof
(
const
char
*
p
,
double
*
out
)
{
int
frac
;
float
sign
,
value
,
scale
;
double
sign
,
value
,
scale
;
*
out
=
0
;
// Skip leading white space, if any.
while
(
*
p
==
' '
)
{
...
...
@@ -113,9 +112,9 @@ inline static const char* Atof(const char* p, float* out) {
}
// Get sign, if any.
sign
=
1.0
f
;
sign
=
1.0
;
if
(
*
p
==
'-'
)
{
sign
=
-
1.0
f
;
sign
=
-
1.0
;
++
p
;
}
else
if
(
*
p
==
'+'
)
{
++
p
;
...
...
@@ -124,24 +123,24 @@ inline static const char* Atof(const char* p, float* out) {
// is a number
if
((
*
p
>=
'0'
&&
*
p
<=
'9'
)
||
*
p
==
'.'
||
*
p
==
'e'
||
*
p
==
'E'
)
{
// Get digits before decimal point or exponent, if any.
for
(
value
=
0.0
f
;
*
p
>=
'0'
&&
*
p
<=
'9'
;
++
p
)
{
value
=
value
*
10.0
f
+
(
*
p
-
'0'
);
for
(
value
=
0.0
;
*
p
>=
'0'
&&
*
p
<=
'9'
;
++
p
)
{
value
=
value
*
10.0
+
(
*
p
-
'0'
);
}
// Get digits after decimal point, if any.
if
(
*
p
==
'.'
)
{
float
pow10
=
10.0
f
;
double
pow10
=
10.0
;
++
p
;
while
(
*
p
>=
'0'
&&
*
p
<=
'9'
)
{
value
+=
(
*
p
-
'0'
)
/
pow10
;
pow10
*=
10.0
f
;
pow10
*=
10.0
;
++
p
;
}
}
// Handle exponent, if any.
frac
=
0
;
scale
=
1.0
f
;
scale
=
1.0
;
if
((
*
p
==
'e'
)
||
(
*
p
==
'E'
))
{
unsigned
int
expon
;
// Get sign of exponent, if any.
...
...
@@ -156,9 +155,11 @@ inline static const char* Atof(const char* p, float* out) {
for
(
expon
=
0
;
*
p
>=
'0'
&&
*
p
<=
'9'
;
++
p
)
{
expon
=
expon
*
10
+
(
*
p
-
'0'
);
}
if
(
expon
>
38
)
expon
=
38
;
if
(
expon
>
308
)
expon
=
308
;
// Calculate scaling factor.
while
(
expon
>=
50
)
{
scale
*=
1E50
;
expon
-=
50
;
}
while
(
expon
>=
8
)
{
scale
*=
1E8
;
expon
-=
8
;
}
while
(
expon
>
0
)
{
scale
*=
10.0
f
;
expon
-=
1
;
}
while
(
expon
>
0
)
{
scale
*=
10.0
;
expon
-=
1
;
}
}
// Return signed and scaled floating point result.
*
out
=
sign
*
(
frac
?
(
value
/
scale
)
:
(
value
*
scale
));
...
...
@@ -174,9 +175,9 @@ inline static const char* Atof(const char* p, float* out) {
std
::
string
tmp_str
(
p
,
cnt
);
std
::
transform
(
tmp_str
.
begin
(),
tmp_str
.
end
(),
tmp_str
.
begin
(),
::
tolower
);
if
(
tmp_str
==
std
::
string
(
"na"
)
||
tmp_str
==
std
::
string
(
"nan"
))
{
*
out
=
0
.0
f
;
*
out
=
0
;
}
else
if
(
tmp_str
==
std
::
string
(
"inf"
)
||
tmp_str
==
std
::
string
(
"infinity"
))
{
*
out
=
sign
*
static_cast
<
float
>
(
1e38
)
;
*
out
=
sign
*
1e3
0
8
;
}
else
{
Log
::
Fatal
(
"Unknow token %s in data file"
,
tmp_str
.
c_str
());
}
...
...
@@ -191,6 +192,8 @@ inline static const char* Atof(const char* p, float* out) {
return
p
;
}
inline
bool
AtoiAndCheck
(
const
char
*
p
,
int
*
out
)
{
const
char
*
after
=
Atoi
(
p
,
out
);
if
(
*
after
!=
'\0'
)
{
...
...
@@ -199,7 +202,7 @@ inline bool AtoiAndCheck(const char* p, int* out) {
return
true
;
}
inline
bool
AtofAndCheck
(
const
char
*
p
,
float
*
out
)
{
inline
bool
AtofAndCheck
(
const
char
*
p
,
double
*
out
)
{
const
char
*
after
=
Atof
(
p
,
out
);
if
(
*
after
!=
'\0'
)
{
return
false
;
...
...
@@ -260,10 +263,11 @@ inline static void StringToIntArray(const std::string& str, char delimiter, size
}
}
inline
static
void
StringToFloatArray
(
const
std
::
string
&
str
,
char
delimiter
,
size_t
n
,
float
*
out
)
{
inline
static
void
StringToDoubleArray
(
const
std
::
string
&
str
,
char
delimiter
,
size_t
n
,
double
*
out
)
{
std
::
vector
<
std
::
string
>
strs
=
Split
(
str
.
c_str
(),
delimiter
);
if
(
strs
.
size
()
!=
n
)
{
Log
::
Fatal
(
"StringTo
Float
Array error, size doesn't matched."
);
Log
::
Fatal
(
"StringTo
Double
Array error, size doesn't matched."
);
}
for
(
size_t
i
=
0
;
i
<
strs
.
size
();
++
i
)
{
strs
[
i
]
=
Trim
(
strs
[
i
]);
...
...
@@ -271,12 +275,12 @@ inline static void StringToFloatArray(const std::string& str, char delimiter, si
}
}
inline
static
std
::
vector
<
float
>
StringTo
Float
Array
(
const
std
::
string
&
str
,
char
delimiter
)
{
inline
static
std
::
vector
<
double
>
StringTo
Double
Array
(
const
std
::
string
&
str
,
char
delimiter
)
{
std
::
vector
<
std
::
string
>
strs
=
Split
(
str
.
c_str
(),
delimiter
);
std
::
vector
<
float
>
ret
;
std
::
vector
<
double
>
ret
;
for
(
size_t
i
=
0
;
i
<
strs
.
size
();
++
i
)
{
strs
[
i
]
=
Trim
(
strs
[
i
]);
float
val
=
0.0
f
;
double
val
=
0.0
f
;
Atof
(
strs
[
i
].
c_str
(),
&
val
);
ret
.
push_back
(
val
);
}
...
...
@@ -338,19 +342,19 @@ static inline int64_t Pow2RoundUp(int64_t x) {
* \brief Do inplace softmax transformaton on p_rec
* \param p_rec The input/output vector of the values.
*/
inline
void
Softmax
(
std
::
vector
<
float
>*
p_rec
)
{
std
::
vector
<
float
>
&
rec
=
*
p_rec
;
float
wmax
=
rec
[
0
];
inline
void
Softmax
(
std
::
vector
<
double
>*
p_rec
)
{
std
::
vector
<
double
>
&
rec
=
*
p_rec
;
double
wmax
=
rec
[
0
];
for
(
size_t
i
=
1
;
i
<
rec
.
size
();
++
i
)
{
wmax
=
std
::
max
(
rec
[
i
],
wmax
);
}
float
wsum
=
0.0
f
;
double
wsum
=
0.0
f
;
for
(
size_t
i
=
0
;
i
<
rec
.
size
();
++
i
)
{
rec
[
i
]
=
std
::
exp
(
rec
[
i
]
-
wmax
);
wsum
+=
rec
[
i
];
}
for
(
size_t
i
=
0
;
i
<
rec
.
size
();
++
i
)
{
rec
[
i
]
/=
static_cast
<
float
>
(
wsum
);
rec
[
i
]
/=
static_cast
<
double
>
(
wsum
);
}
}
...
...
src/application/application.cpp
View file @
3b50aeac
...
...
@@ -125,7 +125,7 @@ void Application::LoadData() {
if
(
boosting_
->
NumberOfSubModels
()
>
0
)
{
predictor
=
new
Predictor
(
boosting_
,
config_
.
io_config
.
is_sigmoid
,
config_
.
predict_leaf_index
,
-
1
);
predict_fun
=
[
&
predictor
](
const
std
::
vector
<
std
::
pair
<
int
,
float
>>&
features
)
{
[
&
predictor
](
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
)
{
return
predictor
->
PredictRawOneLine
(
features
);
};
}
...
...
@@ -210,7 +210,7 @@ void Application::InitTrain() {
gbdt_config
->
tree_config
.
feature_fraction_seed
=
GlobalSyncUpByMin
<
int
>
(
gbdt_config
->
tree_config
.
feature_fraction_seed
);
gbdt_config
->
tree_config
.
feature_fraction
=
GlobalSyncUpByMin
<
float
>
(
gbdt_config
->
tree_config
.
feature_fraction
);
GlobalSyncUpByMin
<
double
>
(
gbdt_config
->
tree_config
.
feature_fraction
);
}
}
// create boosting
...
...
src/application/predictor.hpp
View file @
3b50aeac
...
...
@@ -39,9 +39,9 @@ public:
{
num_threads_
=
omp_get_num_threads
();
}
features_
=
new
float
*
[
num_threads_
];
features_
=
new
double
*
[
num_threads_
];
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
features_
[
i
]
=
new
float
[
num_features_
];
features_
[
i
]
=
new
double
[
num_features_
];
}
}
/*!
...
...
@@ -61,7 +61,7 @@ public:
* \param features Feature for this record
* \return Prediction result
*/
float
PredictRawOneLine
(
const
std
::
vector
<
std
::
pair
<
int
,
float
>>&
features
)
{
double
PredictRawOneLine
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
)
{
const
int
tid
=
PutFeatureValuesToBuffer
(
features
);
// get result without sigmoid transformation
return
boosting_
->
PredictRaw
(
features_
[
tid
],
num_used_model_
);
...
...
@@ -72,7 +72,7 @@ public:
* \param features Feature for this record
* \return Predictied leaf index
*/
std
::
vector
<
int
>
PredictLeafIndexOneLine
(
const
std
::
vector
<
std
::
pair
<
int
,
float
>>&
features
)
{
std
::
vector
<
int
>
PredictLeafIndexOneLine
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
)
{
const
int
tid
=
PutFeatureValuesToBuffer
(
features
);
// get result for leaf index
return
boosting_
->
PredictLeafIndex
(
features_
[
tid
],
num_used_model_
);
...
...
@@ -83,7 +83,7 @@ public:
* \param features Feature of this record
* \return Prediction result
*/
float
PredictOneLine
(
const
std
::
vector
<
std
::
pair
<
int
,
float
>>&
features
)
{
double
PredictOneLine
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
)
{
const
int
tid
=
PutFeatureValuesToBuffer
(
features
);
// get result with sigmoid transform if needed
return
boosting_
->
Predict
(
features_
[
tid
],
num_used_model_
);
...
...
@@ -94,7 +94,7 @@ public:
* \param features Feature of this record
* \return Prediction result
*/
std
::
vector
<
float
>
PredictMulticlassOneLine
(
const
std
::
vector
<
std
::
pair
<
int
,
float
>>&
features
)
{
std
::
vector
<
double
>
PredictMulticlassOneLine
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
)
{
const
int
tid
=
PutFeatureValuesToBuffer
(
features
);
// get result with sigmoid transform if needed
return
boosting_
->
PredictMulticlass
(
features_
[
tid
],
num_used_model_
);
...
...
@@ -125,17 +125,17 @@ public:
}
// function for parse data
std
::
function
<
void
(
const
char
*
,
std
::
vector
<
std
::
pair
<
int
,
float
>>*
)
>
parser_fun
;
float
tmp_label
;
std
::
function
<
void
(
const
char
*
,
std
::
vector
<
std
::
pair
<
int
,
double
>>*
)
>
parser_fun
;
double
tmp_label
;
parser_fun
=
[
this
,
&
parser
,
&
tmp_label
]
(
const
char
*
buffer
,
std
::
vector
<
std
::
pair
<
int
,
float
>>*
feature
)
{
(
const
char
*
buffer
,
std
::
vector
<
std
::
pair
<
int
,
double
>>*
feature
)
{
parser
->
ParseOneLine
(
buffer
,
feature
,
&
tmp_label
);
};
std
::
function
<
std
::
string
(
const
std
::
vector
<
std
::
pair
<
int
,
float
>>&
)
>
predict_fun
;
std
::
function
<
std
::
string
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
)
>
predict_fun
;
if
(
num_class_
>
1
)
{
predict_fun
=
[
this
](
const
std
::
vector
<
std
::
pair
<
int
,
float
>>&
features
){
std
::
vector
<
float
>
prediction
=
PredictMulticlassOneLine
(
features
);
predict_fun
=
[
this
](
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
){
std
::
vector
<
double
>
prediction
=
PredictMulticlassOneLine
(
features
);
std
::
stringstream
result_stream_buf
;
for
(
size_t
i
=
0
;
i
<
prediction
.
size
();
++
i
){
if
(
i
>
0
)
{
...
...
@@ -147,7 +147,7 @@ public:
};
}
else
if
(
is_predict_leaf_index_
)
{
predict_fun
=
[
this
](
const
std
::
vector
<
std
::
pair
<
int
,
float
>>&
features
){
predict_fun
=
[
this
](
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
){
std
::
vector
<
int
>
predicted_leaf_index
=
PredictLeafIndexOneLine
(
features
);
std
::
stringstream
result_stream_buf
;
for
(
size_t
i
=
0
;
i
<
predicted_leaf_index
.
size
();
++
i
){
...
...
@@ -161,12 +161,12 @@ public:
}
else
{
if
(
is_simgoid_
)
{
predict_fun
=
[
this
](
const
std
::
vector
<
std
::
pair
<
int
,
float
>>&
features
){
predict_fun
=
[
this
](
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
){
return
std
::
to_string
(
PredictOneLine
(
features
));
};
}
else
{
predict_fun
=
[
this
](
const
std
::
vector
<
std
::
pair
<
int
,
float
>>&
features
){
predict_fun
=
[
this
](
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
){
return
std
::
to_string
(
PredictRawOneLine
(
features
));
};
}
...
...
@@ -174,7 +174,7 @@ public:
std
::
function
<
void
(
data_size_t
,
const
std
::
vector
<
std
::
string
>&
)
>
process_fun
=
[
this
,
&
parser_fun
,
&
predict_fun
,
&
result_file
]
(
data_size_t
,
const
std
::
vector
<
std
::
string
>&
lines
)
{
std
::
vector
<
std
::
pair
<
int
,
float
>>
oneline_features
;
std
::
vector
<
std
::
pair
<
int
,
double
>>
oneline_features
;
std
::
vector
<
std
::
string
>
pred_result
(
lines
.
size
(),
""
);
#pragma omp parallel for schedule(static) private(oneline_features)
for
(
data_size_t
i
=
0
;
i
<
static_cast
<
data_size_t
>
(
lines
.
size
());
++
i
)
{
...
...
@@ -197,10 +197,10 @@ public:
}
private:
int
PutFeatureValuesToBuffer
(
const
std
::
vector
<
std
::
pair
<
int
,
float
>>&
features
)
{
int
PutFeatureValuesToBuffer
(
const
std
::
vector
<
std
::
pair
<
int
,
double
>>&
features
)
{
int
tid
=
omp_get_thread_num
();
// init feature value
std
::
memset
(
features_
[
tid
],
0
,
sizeof
(
float
)
*
num_features_
);
std
::
memset
(
features_
[
tid
],
0
,
sizeof
(
double
)
*
num_features_
);
// put feature value
for
(
const
auto
&
p
:
features
)
{
if
(
p
.
first
<
num_features_
)
{
...
...
@@ -212,7 +212,7 @@ private:
/*! \brief Boosting model */
const
Boosting
*
boosting_
;
/*! \brief Buffer for feature values */
float
**
features_
;
double
**
features_
;
/*! \brief Number of features */
int
num_features_
;
/*! \brief Number of classes */
...
...
src/boosting/gbdt.cpp
View file @
3b50aeac
...
...
@@ -229,7 +229,7 @@ bool GBDT::OutputMetric(int iter) {
for
(
auto
&
sub_metric
:
training_metrics_
)
{
auto
name
=
sub_metric
->
GetName
();
auto
scores
=
sub_metric
->
Eval
(
train_score_updater_
->
score
());
Log
::
Info
(
"Iteration:%d, %s : %s"
,
iter
,
name
,
Common
::
ArrayToString
<
float
>
(
scores
,
' '
).
c_str
());
Log
::
Info
(
"Iteration:%d, %s : %s"
,
iter
,
name
,
Common
::
ArrayToString
<
double
>
(
scores
,
' '
).
c_str
());
}
}
// print validation metric
...
...
@@ -239,7 +239,7 @@ bool GBDT::OutputMetric(int iter) {
auto
test_scores
=
valid_metrics_
[
i
][
j
]
->
Eval
(
valid_score_updater_
[
i
]
->
score
());
if
((
iter
%
gbdt_config_
->
output_freq
)
==
0
)
{
auto
name
=
valid_metrics_
[
i
][
j
]
->
GetName
();
Log
::
Info
(
"Iteration:%d, %s : %s"
,
iter
,
name
,
Common
::
ArrayToString
<
float
>
(
test_scores
,
' '
).
c_str
());
Log
::
Info
(
"Iteration:%d, %s : %s"
,
iter
,
name
,
Common
::
ArrayToString
<
double
>
(
test_scores
,
' '
).
c_str
());
}
if
(
!
ret
&&
early_stopping_round_
>
0
)
{
bool
the_bigger_the_better
=
valid_metrics_
[
i
][
j
]
->
is_bigger_better
();
...
...
@@ -266,7 +266,7 @@ std::vector<std::string> GBDT::EvalCurrent(bool is_eval_train) const {
auto
name
=
sub_metric
->
GetName
();
auto
scores
=
sub_metric
->
Eval
(
train_score_updater_
->
score
());
std
::
stringstream
str_buf
;
str_buf
<<
name
<<
" : "
<<
Common
::
ArrayToString
<
float
>
(
scores
,
' '
);
str_buf
<<
name
<<
" : "
<<
Common
::
ArrayToString
<
double
>
(
scores
,
' '
);
ret
.
emplace_back
(
str_buf
.
str
());
}
}
...
...
@@ -276,7 +276,7 @@ std::vector<std::string> GBDT::EvalCurrent(bool is_eval_train) const {
auto
name
=
valid_metrics_
[
i
][
j
]
->
GetName
();
auto
test_scores
=
valid_metrics_
[
i
][
j
]
->
Eval
(
valid_score_updater_
[
i
]
->
score
());
std
::
stringstream
str_buf
;
str_buf
<<
name
<<
" : "
<<
Common
::
ArrayToString
<
float
>
(
test_scores
,
' '
);
str_buf
<<
name
<<
" : "
<<
Common
::
ArrayToString
<
double
>
(
test_scores
,
' '
);
ret
.
emplace_back
(
str_buf
.
str
());
}
}
...
...
@@ -420,7 +420,7 @@ void GBDT::ModelsFromString(const std::string& model_str) {
}
// if sigmoid doesn't exists
if
(
i
==
lines
.
size
())
{
sigmoid_
=
-
1.0
;
sigmoid_
=
-
1.0
f
;
}
// get tree models
i
=
0
;
...
...
@@ -467,22 +467,22 @@ std::string GBDT::FeatureImportance() const {
return
str_buf
.
str
();
}
float
GBDT
::
PredictRaw
(
const
float
*
value
,
int
num_used_model
)
const
{
double
GBDT
::
PredictRaw
(
const
double
*
value
,
int
num_used_model
)
const
{
if
(
num_used_model
<
0
)
{
num_used_model
=
static_cast
<
int
>
(
models_
.
size
());
}
float
ret
=
0.0
f
;
double
ret
=
0.0
f
;
for
(
int
i
=
0
;
i
<
num_used_model
;
++
i
)
{
ret
+=
models_
[
i
]
->
Predict
(
value
);
}
return
ret
;
}
float
GBDT
::
Predict
(
const
float
*
value
,
int
num_used_model
)
const
{
double
GBDT
::
Predict
(
const
double
*
value
,
int
num_used_model
)
const
{
if
(
num_used_model
<
0
)
{
num_used_model
=
static_cast
<
int
>
(
models_
.
size
());
}
float
ret
=
0.0
f
;
double
ret
=
0.0
f
;
for
(
int
i
=
0
;
i
<
num_used_model
;
++
i
)
{
ret
+=
models_
[
i
]
->
Predict
(
value
);
}
...
...
@@ -493,11 +493,11 @@ float GBDT::Predict(const float* value, int num_used_model) const {
return
ret
;
}
std
::
vector
<
float
>
GBDT
::
PredictMulticlass
(
const
float
*
value
,
int
num_used_model
)
const
{
std
::
vector
<
double
>
GBDT
::
PredictMulticlass
(
const
double
*
value
,
int
num_used_model
)
const
{
if
(
num_used_model
<
0
)
{
num_used_model
=
static_cast
<
int
>
(
models_
.
size
())
/
num_class_
;
}
std
::
vector
<
float
>
ret
(
num_class_
,
0.0
f
);
std
::
vector
<
double
>
ret
(
num_class_
,
0.0
f
);
for
(
int
i
=
0
;
i
<
num_used_model
;
++
i
)
{
for
(
int
j
=
0
;
j
<
num_class_
;
++
j
){
ret
[
j
]
+=
models_
[
i
*
num_class_
+
j
]
->
Predict
(
value
);
...
...
@@ -507,7 +507,7 @@ std::vector<float> GBDT::PredictMulticlass(const float* value, int num_used_mode
return
ret
;
}
std
::
vector
<
int
>
GBDT
::
PredictLeafIndex
(
const
float
*
value
,
int
num_used_model
)
const
{
std
::
vector
<
int
>
GBDT
::
PredictLeafIndex
(
const
double
*
value
,
int
num_used_model
)
const
{
if
(
num_used_model
<
0
)
{
num_used_model
=
static_cast
<
int
>
(
models_
.
size
());
}
...
...
src/boosting/gbdt.h
View file @
3b50aeac
...
...
@@ -58,7 +58,7 @@ public:
* \param num_used_model Number of used model
* \return Prediction result for this record
*/
float
PredictRaw
(
const
float
*
feature_values
,
int
num_used_model
)
const
override
;
double
PredictRaw
(
const
double
*
feature_values
,
int
num_used_model
)
const
override
;
/*!
* \brief Predtion for one record with sigmoid transformation if enabled
...
...
@@ -66,14 +66,14 @@ public:
* \param num_used_model Number of used model
* \return Prediction result for this record
*/
float
Predict
(
const
float
*
feature_values
,
int
num_used_model
)
const
override
;
double
Predict
(
const
double
*
feature_values
,
int
num_used_model
)
const
override
;
/*!
* \brief Predtion for multiclass classification
* \param feature_values Feature value on this record
* \return Prediction result, num_class numbers per line
*/
std
::
vector
<
float
>
PredictMulticlass
(
const
float
*
value
,
int
num_used_model
)
const
override
;
std
::
vector
<
double
>
PredictMulticlass
(
const
double
*
value
,
int
num_used_model
)
const
override
;
/*!
* \brief Predtion for one record with leaf index
...
...
@@ -81,7 +81,7 @@ public:
* \param num_used_model Number of used model
* \return Predicted leaf index for this record
*/
std
::
vector
<
int
>
PredictLeafIndex
(
const
float
*
value
,
int
num_used_model
)
const
override
;
std
::
vector
<
int
>
PredictLeafIndex
(
const
double
*
value
,
int
num_used_model
)
const
override
;
/*!
* \brief Serialize models by string
...
...
@@ -177,7 +177,7 @@ private:
int
early_stopping_round_
;
/*! \brief Best score(s) for early stopping */
std
::
vector
<
std
::
vector
<
int
>>
best_iter_
;
std
::
vector
<
std
::
vector
<
score_t
>>
best_score_
;
std
::
vector
<
std
::
vector
<
double
>>
best_score_
;
/*! \brief Trained models(trees) */
std
::
vector
<
Tree
*>
models_
;
/*! \brief Max feature index of training data*/
...
...
@@ -204,7 +204,7 @@ private:
* \brief Sigmoid parameter, used for prediction.
* if > 0 meas output score will transform by sigmoid function
*/
float
sigmoid_
;
double
sigmoid_
;
/*! \brief Index of label column */
data_size_t
label_idx_
;
/*! \brief Saved number of models */
...
...
src/io/bin.cpp
View file @
3b50aeac
...
...
@@ -24,7 +24,7 @@ BinMapper::BinMapper(const BinMapper& other)
num_bin_
=
other
.
num_bin_
;
is_trival_
=
other
.
is_trival_
;
sparse_rate_
=
other
.
sparse_rate_
;
bin_upper_bound_
=
new
float
[
num_bin_
];
bin_upper_bound_
=
new
double
[
num_bin_
];
for
(
int
i
=
0
;
i
<
num_bin_
;
++
i
)
{
bin_upper_bound_
[
i
]
=
other
.
bin_upper_bound_
[
i
];
}
...
...
@@ -39,11 +39,11 @@ BinMapper::~BinMapper() {
delete
[]
bin_upper_bound_
;
}
void
BinMapper
::
FindBin
(
std
::
vector
<
float
>*
values
,
int
max_bin
)
{
std
::
vector
<
float
>&
ref_values
=
(
*
values
);
void
BinMapper
::
FindBin
(
std
::
vector
<
double
>*
values
,
int
max_bin
)
{
std
::
vector
<
double
>&
ref_values
=
(
*
values
);
size_t
sample_size
=
values
->
size
();
// find distinct_values first
std
::
vector
<
float
>
distinct_values
;
std
::
vector
<
double
>
distinct_values
;
std
::
vector
<
int
>
counts
;
std
::
sort
(
ref_values
.
begin
(),
ref_values
.
end
());
...
...
@@ -63,21 +63,21 @@ void BinMapper::FindBin(std::vector<float>* values, int max_bin) {
if
(
num_values
<=
max_bin
)
{
// use distinct value is enough
num_bin_
=
num_values
;
bin_upper_bound_
=
new
float
[
num_values
];
bin_upper_bound_
=
new
double
[
num_values
];
for
(
int
i
=
0
;
i
<
num_values
-
1
;
++
i
)
{
bin_upper_bound_
[
i
]
=
(
distinct_values
[
i
]
+
distinct_values
[
i
+
1
])
/
2
;
}
cnt_in_bin0
=
counts
[
0
];
bin_upper_bound_
[
num_values
-
1
]
=
std
::
numeric_limits
<
float
>::
infinity
();
bin_upper_bound_
[
num_values
-
1
]
=
std
::
numeric_limits
<
double
>::
infinity
();
}
else
{
// mean size for one bin
float
mean_bin_size
=
sample_size
/
static_cast
<
float
>
(
max_bin
);
double
mean_bin_size
=
sample_size
/
static_cast
<
double
>
(
max_bin
);
int
rest_sample_cnt
=
static_cast
<
int
>
(
sample_size
);
int
bin_cnt
=
0
;
num_bin_
=
max_bin
;
std
::
vector
<
float
>
upper_bounds
(
max_bin
,
std
::
numeric_limits
<
float
>::
infinity
());
std
::
vector
<
float
>
lower_bounds
(
max_bin
,
std
::
numeric_limits
<
float
>::
infinity
());
std
::
vector
<
double
>
upper_bounds
(
max_bin
,
std
::
numeric_limits
<
double
>::
infinity
());
std
::
vector
<
double
>
lower_bounds
(
max_bin
,
std
::
numeric_limits
<
double
>::
infinity
());
// sort by count, descent
Common
::
SortForPair
(
counts
,
distinct_values
,
0
,
true
);
// fetch big slot as unique bin
...
...
@@ -90,8 +90,8 @@ void BinMapper::FindBin(std::vector<float>* values, int max_bin) {
// process reminder bins
if
(
bin_cnt
<
max_bin
)
{
// sort rest by values
Common
::
SortForPair
<
float
,
int
>
(
distinct_values
,
counts
,
bin_cnt
,
false
);
mean_bin_size
=
rest_sample_cnt
/
static_cast
<
float
>
(
max_bin
-
bin_cnt
);
Common
::
SortForPair
<
double
,
int
>
(
distinct_values
,
counts
,
bin_cnt
,
false
);
mean_bin_size
=
rest_sample_cnt
/
static_cast
<
double
>
(
max_bin
-
bin_cnt
);
lower_bounds
[
bin_cnt
]
=
distinct_values
[
bin_cnt
];
int
cur_cnt_inbin
=
0
;
for
(
int
i
=
bin_cnt
;
i
<
num_values
-
1
;
++
i
)
{
...
...
@@ -105,21 +105,21 @@ void BinMapper::FindBin(std::vector<float>* values, int max_bin) {
lower_bounds
[
bin_cnt
]
=
distinct_values
[
i
+
1
];
if
(
bin_cnt
>=
max_bin
-
1
)
break
;
cur_cnt_inbin
=
0
;
mean_bin_size
=
rest_sample_cnt
/
static_cast
<
float
>
(
max_bin
-
bin_cnt
);
mean_bin_size
=
rest_sample_cnt
/
static_cast
<
double
>
(
max_bin
-
bin_cnt
);
}
}
cur_cnt_inbin
+=
counts
[
num_values
-
1
];
}
Common
::
SortForPair
<
float
,
float
>
(
lower_bounds
,
upper_bounds
,
0
,
false
);
Common
::
SortForPair
<
double
,
double
>
(
lower_bounds
,
upper_bounds
,
0
,
false
);
// update bin upper bound
bin_upper_bound_
=
new
float
[
bin_cnt
];
bin_upper_bound_
=
new
double
[
bin_cnt
];
num_bin_
=
bin_cnt
;
for
(
int
i
=
0
;
i
<
bin_cnt
-
1
;
++
i
)
{
bin_upper_bound_
[
i
]
=
(
upper_bounds
[
i
]
+
lower_bounds
[
i
+
1
])
/
2.0
f
;
}
// last bin upper bound
bin_upper_bound_
[
bin_cnt
-
1
]
=
std
::
numeric_limits
<
float
>::
infinity
();
bin_upper_bound_
[
bin_cnt
-
1
]
=
std
::
numeric_limits
<
double
>::
infinity
();
}
// check trival(num_bin_ == 1) feature
if
(
num_bin_
<=
1
)
{
...
...
@@ -128,7 +128,7 @@ void BinMapper::FindBin(std::vector<float>* values, int max_bin) {
is_trival_
=
false
;
}
// calculate sparse rate
sparse_rate_
=
static_cast
<
float
>
(
cnt_in_bin0
)
/
static_cast
<
float
>
(
sample_size
);
sparse_rate_
=
static_cast
<
double
>
(
cnt_in_bin0
)
/
static_cast
<
double
>
(
sample_size
);
}
...
...
@@ -136,8 +136,8 @@ int BinMapper::SizeForSpecificBin(int bin) {
int
size
=
0
;
size
+=
sizeof
(
int
);
size
+=
sizeof
(
bool
);
size
+=
sizeof
(
float
);
size
+=
bin
*
sizeof
(
float
);
size
+=
sizeof
(
double
);
size
+=
bin
*
sizeof
(
double
);
return
size
;
}
...
...
@@ -148,7 +148,7 @@ void BinMapper::CopyTo(char * buffer) {
buffer
+=
sizeof
(
is_trival_
);
std
::
memcpy
(
buffer
,
&
sparse_rate_
,
sizeof
(
sparse_rate_
));
buffer
+=
sizeof
(
sparse_rate_
);
std
::
memcpy
(
buffer
,
bin_upper_bound_
,
num_bin_
*
sizeof
(
float
));
std
::
memcpy
(
buffer
,
bin_upper_bound_
,
num_bin_
*
sizeof
(
double
));
}
void
BinMapper
::
CopyFrom
(
const
char
*
buffer
)
{
...
...
@@ -159,19 +159,19 @@ void BinMapper::CopyFrom(const char * buffer) {
std
::
memcpy
(
&
sparse_rate_
,
buffer
,
sizeof
(
sparse_rate_
));
buffer
+=
sizeof
(
sparse_rate_
);
if
(
bin_upper_bound_
!=
nullptr
)
{
delete
[]
bin_upper_bound_
;
}
bin_upper_bound_
=
new
float
[
num_bin_
];
std
::
memcpy
(
bin_upper_bound_
,
buffer
,
num_bin_
*
sizeof
(
float
));
bin_upper_bound_
=
new
double
[
num_bin_
];
std
::
memcpy
(
bin_upper_bound_
,
buffer
,
num_bin_
*
sizeof
(
double
));
}
void
BinMapper
::
SaveBinaryToFile
(
FILE
*
file
)
const
{
fwrite
(
&
num_bin_
,
sizeof
(
num_bin_
),
1
,
file
);
fwrite
(
&
is_trival_
,
sizeof
(
is_trival_
),
1
,
file
);
fwrite
(
&
sparse_rate_
,
sizeof
(
sparse_rate_
),
1
,
file
);
fwrite
(
bin_upper_bound_
,
sizeof
(
float
),
num_bin_
,
file
);
fwrite
(
bin_upper_bound_
,
sizeof
(
double
),
num_bin_
,
file
);
}
size_t
BinMapper
::
SizesInByte
()
const
{
return
sizeof
(
num_bin_
)
+
sizeof
(
is_trival_
)
+
sizeof
(
sparse_rate_
)
+
sizeof
(
float
)
*
num_bin_
;
return
sizeof
(
num_bin_
)
+
sizeof
(
is_trival_
)
+
sizeof
(
sparse_rate_
)
+
sizeof
(
double
)
*
num_bin_
;
}
template
class
DenseBin
<
uint8_t
>;
...
...
@@ -187,9 +187,9 @@ template class OrderedSparseBin<uint16_t>;
template
class
OrderedSparseBin
<
uint32_t
>;
Bin
*
Bin
::
CreateBin
(
data_size_t
num_data
,
int
num_bin
,
float
sparse_rate
,
bool
is_enable_sparse
,
bool
*
is_sparse
,
int
default_bin
)
{
Bin
*
Bin
::
CreateBin
(
data_size_t
num_data
,
int
num_bin
,
double
sparse_rate
,
bool
is_enable_sparse
,
bool
*
is_sparse
,
int
default_bin
)
{
// sparse threshold
const
float
kSparseThreshold
=
0.8
f
;
const
double
kSparseThreshold
=
0.8
f
;
if
(
sparse_rate
>=
kSparseThreshold
&&
is_enable_sparse
)
{
*
is_sparse
=
true
;
return
CreateSparseBin
(
num_data
,
num_bin
,
default_bin
);
...
...
src/io/config.cpp
View file @
3b50aeac
...
...
@@ -216,38 +216,38 @@ void IOConfig::Set(const std::unordered_map<std::string, std::string>& params) {
void
ObjectiveConfig
::
Set
(
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>&
params
)
{
GetBool
(
params
,
"is_unbalance"
,
&
is_unbalance
);
Get
Float
(
params
,
"sigmoid"
,
&
sigmoid
);
Get
Double
(
params
,
"sigmoid"
,
&
sigmoid
);
GetInt
(
params
,
"max_position"
,
&
max_position
);
CHECK
(
max_position
>
0
);
GetInt
(
params
,
"num_class"
,
&
num_class
);
CHECK
(
num_class
>=
1
);
std
::
string
tmp_str
=
""
;
if
(
GetString
(
params
,
"label_gain"
,
&
tmp_str
))
{
label_gain
=
Common
::
StringTo
Float
Array
(
tmp_str
,
','
);
label_gain
=
Common
::
StringTo
Double
Array
(
tmp_str
,
','
);
}
else
{
// label_gain = 2^i - 1, may overflow, so we use 31 here
const
int
max_label
=
31
;
label_gain
.
push_back
(
0.0
f
);
for
(
int
i
=
1
;
i
<
max_label
;
++
i
)
{
label_gain
.
push_back
(
static_cast
<
float
>
((
1
<<
i
)
-
1
));
label_gain
.
push_back
(
static_cast
<
double
>
((
1
<<
i
)
-
1
));
}
}
}
void
MetricConfig
::
Set
(
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>&
params
)
{
Get
Float
(
params
,
"sigmoid"
,
&
sigmoid
);
Get
Double
(
params
,
"sigmoid"
,
&
sigmoid
);
GetInt
(
params
,
"num_class"
,
&
num_class
);
CHECK
(
num_class
>=
1
);
std
::
string
tmp_str
=
""
;
if
(
GetString
(
params
,
"label_gain"
,
&
tmp_str
))
{
label_gain
=
Common
::
StringTo
Float
Array
(
tmp_str
,
','
);
label_gain
=
Common
::
StringTo
Double
Array
(
tmp_str
,
','
);
}
else
{
// label_gain = 2^i - 1, may overflow, so we use 31 here
const
int
max_label
=
31
;
label_gain
.
push_back
(
0.0
f
);
for
(
int
i
=
1
;
i
<
max_label
;
++
i
)
{
label_gain
.
push_back
(
static_cast
<
float
>
((
1
<<
i
)
-
1
));
label_gain
.
push_back
(
static_cast
<
double
>
((
1
<<
i
)
-
1
));
}
}
if
(
GetString
(
params
,
"ndcg_eval_at"
,
&
tmp_str
))
{
...
...
@@ -267,14 +267,14 @@ void MetricConfig::Set(const std::unordered_map<std::string, std::string>& param
void
TreeConfig
::
Set
(
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>&
params
)
{
GetInt
(
params
,
"min_data_in_leaf"
,
&
min_data_in_leaf
);
Get
Float
(
params
,
"min_sum_hessian_in_leaf"
,
&
min_sum_hessian_in_leaf
);
Get
Double
(
params
,
"min_sum_hessian_in_leaf"
,
&
min_sum_hessian_in_leaf
);
CHECK
(
min_sum_hessian_in_leaf
>
1.0
f
||
min_data_in_leaf
>
0
);
GetInt
(
params
,
"num_leaves"
,
&
num_leaves
);
CHECK
(
num_leaves
>
1
);
GetInt
(
params
,
"feature_fraction_seed"
,
&
feature_fraction_seed
);
Get
Float
(
params
,
"feature_fraction"
,
&
feature_fraction
);
Get
Double
(
params
,
"feature_fraction"
,
&
feature_fraction
);
CHECK
(
feature_fraction
>
0.0
f
&&
feature_fraction
<=
1.0
f
);
Get
Float
(
params
,
"histogram_pool_size"
,
&
histogram_pool_size
);
Get
Double
(
params
,
"histogram_pool_size"
,
&
histogram_pool_size
);
GetInt
(
params
,
"max_depth"
,
&
max_depth
);
CHECK
(
max_depth
>
1
||
max_depth
<
0
);
}
...
...
@@ -286,9 +286,9 @@ void BoostingConfig::Set(const std::unordered_map<std::string, std::string>& par
GetInt
(
params
,
"bagging_seed"
,
&
bagging_seed
);
GetInt
(
params
,
"bagging_freq"
,
&
bagging_freq
);
CHECK
(
bagging_freq
>=
0
);
Get
Float
(
params
,
"bagging_fraction"
,
&
bagging_fraction
);
Get
Double
(
params
,
"bagging_fraction"
,
&
bagging_fraction
);
CHECK
(
bagging_fraction
>
0.0
f
&&
bagging_fraction
<=
1.0
f
);
Get
Float
(
params
,
"learning_rate"
,
&
learning_rate
);
Get
Double
(
params
,
"learning_rate"
,
&
learning_rate
);
CHECK
(
learning_rate
>
0.0
f
);
GetInt
(
params
,
"early_stopping_round"
,
&
early_stopping_round
);
CHECK
(
early_stopping_round
>=
0
);
...
...
src/io/dataset.cpp
View file @
3b50aeac
...
...
@@ -364,10 +364,10 @@ void Dataset::SetField(const char* field_name, const void* field_data, data_size
void
Dataset
::
ConstructBinMappers
(
int
rank
,
int
num_machines
,
const
std
::
vector
<
std
::
string
>&
sample_data
)
{
// sample_values[i][j], means the value of j-th sample on i-th feature
std
::
vector
<
std
::
vector
<
float
>>
sample_values
;
std
::
vector
<
std
::
vector
<
double
>>
sample_values
;
// temp buffer for one line features and label
std
::
vector
<
std
::
pair
<
int
,
float
>>
oneline_features
;
float
label
;
std
::
vector
<
std
::
pair
<
int
,
double
>>
oneline_features
;
double
label
;
for
(
size_t
i
=
0
;
i
<
sample_data
.
size
();
++
i
)
{
oneline_features
.
clear
();
// parse features
...
...
@@ -376,7 +376,7 @@ void Dataset::ConstructBinMappers(int rank, int num_machines, const std::vector<
for
(
auto
&
feature_values
:
sample_values
)
{
feature_values
.
push_back
(
0.0
);
}
for
(
std
::
pair
<
int
,
float
>&
inner_data
:
oneline_features
)
{
for
(
std
::
pair
<
int
,
double
>&
inner_data
:
oneline_features
)
{
if
(
static_cast
<
size_t
>
(
inner_data
.
first
)
>=
sample_values
.
size
())
{
// if need expand feature set
size_t
need_size
=
inner_data
.
first
-
sample_values
.
size
()
+
1
;
...
...
@@ -571,6 +571,8 @@ void Dataset::LoadValidationData(const Dataset* train_set, bool use_two_round_lo
}
used_feature_map_
=
train_set
->
used_feature_map_
;
num_features_
=
static_cast
<
int
>
(
features_
.
size
());
num_total_features_
=
train_set
->
num_total_features_
;
feature_names_
=
train_set
->
feature_names_
;
// extract features
ExtractFeaturesFromMemory
();
}
else
{
...
...
@@ -585,6 +587,8 @@ void Dataset::LoadValidationData(const Dataset* train_set, bool use_two_round_lo
}
used_feature_map_
=
train_set
->
used_feature_map_
;
num_features_
=
static_cast
<
int
>
(
features_
.
size
());
num_total_features_
=
train_set
->
num_total_features_
;
feature_names_
=
train_set
->
feature_names_
;
// extract features
ExtractFeaturesFromFile
();
}
...
...
@@ -601,8 +605,8 @@ void Dataset::LoadValidationData(const Dataset* train_set, bool use_two_round_lo
}
void
Dataset
::
ExtractFeaturesFromMemory
()
{
std
::
vector
<
std
::
pair
<
int
,
float
>>
oneline_features
;
float
tmp_label
=
0.0
f
;
std
::
vector
<
std
::
pair
<
int
,
double
>>
oneline_features
;
double
tmp_label
=
0.0
f
;
if
(
predict_fun_
==
nullptr
)
{
// if doesn't need to prediction with initial model
#pragma omp parallel for schedule(guided) private(oneline_features) firstprivate(tmp_label)
...
...
@@ -612,7 +616,7 @@ void Dataset::ExtractFeaturesFromMemory() {
// parser
parser_
->
ParseOneLine
(
text_reader_
->
Lines
()[
i
].
c_str
(),
&
oneline_features
,
&
tmp_label
);
// set label
metadata_
.
SetLabelAt
(
i
,
tmp_label
);
metadata_
.
SetLabelAt
(
i
,
static_cast
<
float
>
(
tmp_label
)
)
;
// free processed line:
text_reader_
->
Lines
()[
i
].
clear
();
// shrink_to_fit will be very slow in linux, and seems not free memory, disable for now
...
...
@@ -626,9 +630,9 @@ void Dataset::ExtractFeaturesFromMemory() {
}
else
{
if
(
inner_data
.
first
==
weight_idx_
)
{
metadata_
.
SetWeightAt
(
i
,
inner_data
.
second
);
metadata_
.
SetWeightAt
(
i
,
static_cast
<
float
>
(
inner_data
.
second
)
)
;
}
else
if
(
inner_data
.
first
==
group_idx_
)
{
metadata_
.
SetQueryAt
(
i
,
inner_data
.
second
);
metadata_
.
SetQueryAt
(
i
,
static_cast
<
float
>
(
inner_data
.
second
)
)
;
}
}
}
...
...
@@ -645,7 +649,7 @@ void Dataset::ExtractFeaturesFromMemory() {
// set initial score
init_score
[
i
]
=
static_cast
<
float
>
(
predict_fun_
(
oneline_features
));
// set label
metadata_
.
SetLabelAt
(
i
,
tmp_label
);
metadata_
.
SetLabelAt
(
i
,
static_cast
<
float
>
(
tmp_label
)
)
;
// free processed line:
text_reader_
->
Lines
()[
i
].
clear
();
// shrink_to_fit will be very slow in linux, and seems not free memory, disable for now
...
...
@@ -659,9 +663,9 @@ void Dataset::ExtractFeaturesFromMemory() {
}
else
{
if
(
inner_data
.
first
==
weight_idx_
)
{
metadata_
.
SetWeightAt
(
i
,
inner_data
.
second
);
metadata_
.
SetWeightAt
(
i
,
static_cast
<
float
>
(
inner_data
.
second
)
)
;
}
else
if
(
inner_data
.
first
==
group_idx_
)
{
metadata_
.
SetQueryAt
(
i
,
inner_data
.
second
);
metadata_
.
SetQueryAt
(
i
,
static_cast
<
float
>
(
inner_data
.
second
)
)
;
}
}
}
...
...
@@ -688,8 +692,8 @@ void Dataset::ExtractFeaturesFromFile() {
std
::
function
<
void
(
data_size_t
,
const
std
::
vector
<
std
::
string
>&
)
>
process_fun
=
[
this
,
&
init_score
]
(
data_size_t
start_idx
,
const
std
::
vector
<
std
::
string
>&
lines
)
{
std
::
vector
<
std
::
pair
<
int
,
float
>>
oneline_features
;
float
tmp_label
=
0.0
f
;
std
::
vector
<
std
::
pair
<
int
,
double
>>
oneline_features
;
double
tmp_label
=
0.0
f
;
#pragma omp parallel for schedule(static) private(oneline_features) firstprivate(tmp_label)
for
(
data_size_t
i
=
0
;
i
<
static_cast
<
data_size_t
>
(
lines
.
size
());
++
i
)
{
const
int
tid
=
omp_get_thread_num
();
...
...
@@ -701,7 +705,7 @@ void Dataset::ExtractFeaturesFromFile() {
init_score
[
start_idx
+
i
]
=
static_cast
<
float
>
(
predict_fun_
(
oneline_features
));
}
// set label
metadata_
.
SetLabelAt
(
start_idx
+
i
,
tmp_label
);
metadata_
.
SetLabelAt
(
start_idx
+
i
,
static_cast
<
float
>
(
tmp_label
)
)
;
// push data
for
(
auto
&
inner_data
:
oneline_features
)
{
int
feature_idx
=
used_feature_map_
[
inner_data
.
first
];
...
...
@@ -711,9 +715,9 @@ void Dataset::ExtractFeaturesFromFile() {
}
else
{
if
(
inner_data
.
first
==
weight_idx_
)
{
metadata_
.
SetWeightAt
(
start_idx
+
i
,
inner_data
.
second
);
metadata_
.
SetWeightAt
(
start_idx
+
i
,
static_cast
<
float
>
(
inner_data
.
second
)
)
;
}
else
if
(
inner_data
.
first
==
group_idx_
)
{
metadata_
.
SetQueryAt
(
start_idx
+
i
,
inner_data
.
second
);
metadata_
.
SetQueryAt
(
start_idx
+
i
,
static_cast
<
float
>
(
inner_data
.
second
)
)
;
}
}
}
...
...
@@ -763,7 +767,11 @@ void Dataset::SaveBinaryFile(const char* bin_filename) {
// get size of header
size_t
size_of_header
=
sizeof
(
global_num_data_
)
+
sizeof
(
is_enable_sparse_
)
+
sizeof
(
max_bin_
)
+
sizeof
(
num_data_
)
+
sizeof
(
num_features_
)
+
sizeof
(
size_t
)
+
sizeof
(
int
)
*
used_feature_map_
.
size
();
+
sizeof
(
max_bin_
)
+
sizeof
(
num_data_
)
+
sizeof
(
num_features_
)
+
sizeof
(
num_total_features_
)
+
sizeof
(
size_t
)
+
sizeof
(
int
)
*
used_feature_map_
.
size
();
// size of feature names
for
(
int
i
=
0
;
i
<
num_total_features_
;
++
i
)
{
size_of_header
+=
feature_names_
[
i
].
size
()
+
sizeof
(
int
);
}
fwrite
(
&
size_of_header
,
sizeof
(
size_of_header
),
1
,
file
);
// write header
fwrite
(
&
global_num_data_
,
sizeof
(
global_num_data_
),
1
,
file
);
...
...
@@ -771,10 +779,19 @@ void Dataset::SaveBinaryFile(const char* bin_filename) {
fwrite
(
&
max_bin_
,
sizeof
(
max_bin_
),
1
,
file
);
fwrite
(
&
num_data_
,
sizeof
(
num_data_
),
1
,
file
);
fwrite
(
&
num_features_
,
sizeof
(
num_features_
),
1
,
file
);
fwrite
(
&
num_total_features_
,
sizeof
(
num_features_
),
1
,
file
);
size_t
num_used_feature_map
=
used_feature_map_
.
size
();
fwrite
(
&
num_used_feature_map
,
sizeof
(
num_used_feature_map
),
1
,
file
);
fwrite
(
used_feature_map_
.
data
(),
sizeof
(
int
),
num_used_feature_map
,
file
);
// write feature names
for
(
int
i
=
0
;
i
<
num_total_features_
;
++
i
)
{
int
str_len
=
static_cast
<
int
>
(
feature_names_
[
i
].
size
());
fwrite
(
&
str_len
,
sizeof
(
int
),
1
,
file
);
const
char
*
c_str
=
feature_names_
[
i
].
c_str
();
fwrite
(
c_str
,
sizeof
(
char
),
str_len
,
file
);
}
// get size of meta data
size_t
size_of_metadata
=
metadata_
.
SizesInByte
();
fwrite
(
&
size_of_metadata
,
sizeof
(
size_of_metadata
),
1
,
file
);
...
...
@@ -864,6 +881,8 @@ void Dataset::LoadDataFromBinFile(const char* bin_filename, int rank, int num_ma
mem_ptr
+=
sizeof
(
num_data_
);
num_features_
=
*
(
reinterpret_cast
<
const
int
*>
(
mem_ptr
));
mem_ptr
+=
sizeof
(
num_features_
);
num_total_features_
=
*
(
reinterpret_cast
<
const
int
*>
(
mem_ptr
));
mem_ptr
+=
sizeof
(
num_total_features_
);
size_t
num_used_feature_map
=
*
(
reinterpret_cast
<
const
size_t
*>
(
mem_ptr
));
mem_ptr
+=
sizeof
(
num_used_feature_map
);
const
int
*
tmp_feature_map
=
reinterpret_cast
<
const
int
*>
(
mem_ptr
);
...
...
@@ -871,6 +890,21 @@ void Dataset::LoadDataFromBinFile(const char* bin_filename, int rank, int num_ma
for
(
size_t
i
=
0
;
i
<
num_used_feature_map
;
++
i
)
{
used_feature_map_
.
push_back
(
tmp_feature_map
[
i
]);
}
mem_ptr
+=
sizeof
(
int
)
*
num_used_feature_map
;
// get feature names
feature_names_
.
clear
();
// write feature names
for
(
int
i
=
0
;
i
<
num_total_features_
;
++
i
)
{
int
str_len
=
*
(
reinterpret_cast
<
const
int
*>
(
mem_ptr
));
mem_ptr
+=
sizeof
(
int
);
std
::
stringstream
str_buf
;
for
(
int
j
=
0
;
j
<
str_len
;
++
j
)
{
char
tmp_char
=
*
(
reinterpret_cast
<
const
char
*>
(
mem_ptr
));
mem_ptr
+=
sizeof
(
char
);
str_buf
<<
tmp_char
;
}
feature_names_
.
emplace_back
(
str_buf
.
str
());
}
// read size of meta data
read_cnt
=
fread
(
buffer
,
sizeof
(
size_t
),
1
,
file
);
...
...
src/io/metadata.cpp
View file @
3b50aeac
...
...
@@ -281,9 +281,9 @@ void Metadata::LoadWeights() {
num_weights_
=
static_cast
<
data_size_t
>
(
reader
.
Lines
().
size
());
weights_
=
new
float
[
num_weights_
];
for
(
data_size_t
i
=
0
;
i
<
num_weights_
;
++
i
)
{
float
tmp_weight
=
0.0
f
;
double
tmp_weight
=
0.0
f
;
Common
::
Atof
(
reader
.
Lines
()[
i
].
c_str
(),
&
tmp_weight
);
weights_
[
i
]
=
tmp_weight
;
weights_
[
i
]
=
static_cast
<
float
>
(
tmp_weight
)
;
}
}
...
...
@@ -296,10 +296,10 @@ void Metadata::LoadInitialScore() {
Log
::
Info
(
"Start loading initial scores"
);
num_init_score_
=
static_cast
<
data_size_t
>
(
reader
.
Lines
().
size
());
init_score_
=
new
float
[
num_init_score_
];
float
tmp
=
0.0
f
;
double
tmp
=
0.0
f
;
for
(
data_size_t
i
=
0
;
i
<
num_init_score_
;
++
i
)
{
Common
::
Atof
(
reader
.
Lines
()[
i
].
c_str
(),
&
tmp
);
init_score_
[
i
]
=
tmp
;
init_score_
[
i
]
=
static_cast
<
float
>
(
tmp
)
;
}
}
...
...
src/io/parser.hpp
View file @
3b50aeac
...
...
@@ -18,9 +18,9 @@ public:
:
label_idx_
(
label_idx
)
{
}
inline
void
ParseOneLine
(
const
char
*
str
,
std
::
vector
<
std
::
pair
<
int
,
float
>>*
out_features
,
float
*
out_label
)
const
override
{
std
::
vector
<
std
::
pair
<
int
,
double
>>*
out_features
,
double
*
out_label
)
const
override
{
int
idx
=
0
;
float
val
=
0.0
f
;
double
val
=
0.0
f
;
int
bias
=
0
;
*
out_label
=
0.0
f
;
while
(
*
str
!=
'\0'
)
{
...
...
@@ -50,9 +50,9 @@ public:
:
label_idx_
(
label_idx
)
{
}
inline
void
ParseOneLine
(
const
char
*
str
,
std
::
vector
<
std
::
pair
<
int
,
float
>>*
out_features
,
float
*
out_label
)
const
override
{
std
::
vector
<
std
::
pair
<
int
,
double
>>*
out_features
,
double
*
out_label
)
const
override
{
int
idx
=
0
;
float
val
=
0.0
f
;
double
val
=
0.0
f
;
int
bias
=
0
;
while
(
*
str
!=
'\0'
)
{
str
=
Common
::
Atof
(
str
,
&
val
);
...
...
@@ -83,9 +83,9 @@ public:
}
}
inline
void
ParseOneLine
(
const
char
*
str
,
std
::
vector
<
std
::
pair
<
int
,
float
>>*
out_features
,
float
*
out_label
)
const
override
{
std
::
vector
<
std
::
pair
<
int
,
double
>>*
out_features
,
double
*
out_label
)
const
override
{
int
idx
=
0
;
float
val
=
0.0
f
;
double
val
=
0.0
f
;
if
(
label_idx_
==
0
)
{
str
=
Common
::
Atof
(
str
,
&
val
);
*
out_label
=
val
;
...
...
src/io/tree.cpp
View file @
3b50aeac
...
...
@@ -23,11 +23,11 @@ Tree::Tree(int max_leaves)
split_feature_
=
new
int
[
max_leaves_
-
1
];
split_feature_real_
=
new
int
[
max_leaves_
-
1
];
threshold_in_bin_
=
new
unsigned
int
[
max_leaves_
-
1
];
threshold_
=
new
float
[
max_leaves_
-
1
];
split_gain_
=
new
float
[
max_leaves_
-
1
];
threshold_
=
new
double
[
max_leaves_
-
1
];
split_gain_
=
new
double
[
max_leaves_
-
1
];
leaf_parent_
=
new
int
[
max_leaves_
];
leaf_value_
=
new
float
[
max_leaves_
];
leaf_value_
=
new
double
[
max_leaves_
];
leaf_depth_
=
new
int
[
max_leaves_
];
// root is in the depth 1
leaf_depth_
[
0
]
=
1
;
...
...
@@ -48,7 +48,7 @@ Tree::~Tree() {
}
int
Tree
::
Split
(
int
leaf
,
int
feature
,
unsigned
int
threshold_bin
,
int
real_feature
,
float
threshold
,
float
left_value
,
float
right_value
,
float
gain
)
{
double
threshold
,
double
left_value
,
double
right_value
,
double
gain
)
{
int
new_node_idx
=
num_leaves_
-
1
;
// update parent info
int
parent
=
leaf_parent_
[
leaf
];
...
...
@@ -89,7 +89,7 @@ void Tree::AddPredictionToScore(const Dataset* data, data_size_t num_data, score
iterators
.
push_back
(
data
->
FeatureAt
(
i
)
->
bin_data
()
->
GetIterator
(
start
));
}
for
(
data_size_t
i
=
start
;
i
<
end
;
++
i
)
{
score
[
i
]
+=
leaf_value_
[
GetLeaf
(
iterators
,
i
)];
score
[
i
]
+=
static_cast
<
score_t
>
(
leaf_value_
[
GetLeaf
(
iterators
,
i
)]
)
;
}
});
}
...
...
@@ -103,7 +103,7 @@ void Tree::AddPredictionToScore(const Dataset* data, const data_size_t* used_dat
iterators
.
push_back
(
data
->
FeatureAt
(
i
)
->
bin_data
()
->
GetIterator
(
used_data_indices
[
start
]));
}
for
(
data_size_t
i
=
start
;
i
<
end
;
++
i
)
{
score
[
used_data_indices
[
i
]]
+=
leaf_value_
[
GetLeaf
(
iterators
,
used_data_indices
[
i
])];
score
[
used_data_indices
[
i
]]
+=
static_cast
<
score_t
>
(
leaf_value_
[
GetLeaf
(
iterators
,
used_data_indices
[
i
])]
)
;
}
});
}
...
...
@@ -114,9 +114,9 @@ std::string Tree::ToString() {
ss
<<
"split_feature="
<<
Common
::
ArrayToString
<
int
>
(
split_feature_real_
,
num_leaves_
-
1
,
' '
)
<<
std
::
endl
;
ss
<<
"split_gain="
<<
Common
::
ArrayToString
<
float
>
(
split_gain_
,
num_leaves_
-
1
,
' '
)
<<
std
::
endl
;
<<
Common
::
ArrayToString
<
double
>
(
split_gain_
,
num_leaves_
-
1
,
' '
)
<<
std
::
endl
;
ss
<<
"threshold="
<<
Common
::
ArrayToString
<
float
>
(
threshold_
,
num_leaves_
-
1
,
' '
)
<<
std
::
endl
;
<<
Common
::
ArrayToString
<
double
>
(
threshold_
,
num_leaves_
-
1
,
' '
)
<<
std
::
endl
;
ss
<<
"left_child="
<<
Common
::
ArrayToString
<
int
>
(
left_child_
,
num_leaves_
-
1
,
' '
)
<<
std
::
endl
;
ss
<<
"right_child="
...
...
@@ -124,7 +124,7 @@ std::string Tree::ToString() {
ss
<<
"leaf_parent="
<<
Common
::
ArrayToString
<
int
>
(
leaf_parent_
,
num_leaves_
,
' '
)
<<
std
::
endl
;
ss
<<
"leaf_value="
<<
Common
::
ArrayToString
<
float
>
(
leaf_value_
,
num_leaves_
,
' '
)
<<
std
::
endl
;
<<
Common
::
ArrayToString
<
double
>
(
leaf_value_
,
num_leaves_
,
' '
)
<<
std
::
endl
;
ss
<<
std
::
endl
;
return
ss
.
str
();
}
...
...
@@ -154,10 +154,10 @@ Tree::Tree(const std::string& str) {
left_child_
=
new
int
[
num_leaves_
-
1
];
right_child_
=
new
int
[
num_leaves_
-
1
];
split_feature_real_
=
new
int
[
num_leaves_
-
1
];
threshold_
=
new
float
[
num_leaves_
-
1
];
split_gain_
=
new
float
[
num_leaves_
-
1
];
threshold_
=
new
double
[
num_leaves_
-
1
];
split_gain_
=
new
double
[
num_leaves_
-
1
];
leaf_parent_
=
new
int
[
num_leaves_
];
leaf_value_
=
new
float
[
num_leaves_
];
leaf_value_
=
new
double
[
num_leaves_
];
split_feature_
=
nullptr
;
threshold_in_bin_
=
nullptr
;
...
...
@@ -165,9 +165,9 @@ Tree::Tree(const std::string& str) {
Common
::
StringToIntArray
(
key_vals
[
"split_feature"
],
' '
,
num_leaves_
-
1
,
split_feature_real_
);
Common
::
StringTo
Float
Array
(
key_vals
[
"split_gain"
],
' '
,
Common
::
StringTo
Double
Array
(
key_vals
[
"split_gain"
],
' '
,
num_leaves_
-
1
,
split_gain_
);
Common
::
StringTo
Float
Array
(
key_vals
[
"threshold"
],
' '
,
Common
::
StringTo
Double
Array
(
key_vals
[
"threshold"
],
' '
,
num_leaves_
-
1
,
threshold_
);
Common
::
StringToIntArray
(
key_vals
[
"left_child"
],
' '
,
num_leaves_
-
1
,
left_child_
);
...
...
@@ -175,7 +175,7 @@ Tree::Tree(const std::string& str) {
num_leaves_
-
1
,
right_child_
);
Common
::
StringToIntArray
(
key_vals
[
"leaf_parent"
],
' '
,
num_leaves_
,
leaf_parent_
);
Common
::
StringTo
Float
Array
(
key_vals
[
"leaf_value"
],
' '
,
Common
::
StringTo
Double
Array
(
key_vals
[
"leaf_value"
],
' '
,
num_leaves_
,
leaf_value_
);
}
...
...
src/metric/binary_metric.hpp
View file @
3b50aeac
...
...
@@ -41,7 +41,7 @@ public:
weights_
=
metadata
.
weights
();
if
(
weights_
==
nullptr
)
{
sum_weights_
=
static_cast
<
float
>
(
num_data_
);
sum_weights_
=
static_cast
<
double
>
(
num_data_
);
}
else
{
sum_weights_
=
0.0
f
;
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
...
...
@@ -58,8 +58,8 @@ public:
return
false
;
}
std
::
vector
<
float
>
Eval
(
const
score_t
*
score
)
const
override
{
score_t
sum_loss
=
0.0
f
;
std
::
vector
<
double
>
Eval
(
const
score_t
*
score
)
const
override
{
double
sum_loss
=
0.0
f
;
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
...
...
@@ -77,8 +77,8 @@ public:
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
prob
)
*
weights_
[
i
];
}
}
score_t
loss
=
sum_loss
/
sum_weights_
;
return
std
::
vector
<
float
>
(
1
,
static_cast
<
float
>
(
loss
)
)
;
double
loss
=
sum_loss
/
sum_weights_
;
return
std
::
vector
<
double
>
(
1
,
loss
);
}
private:
...
...
@@ -89,7 +89,7 @@ private:
/*! \brief Pointer of weighs */
const
float
*
weights_
;
/*! \brief Sum weights */
float
sum_weights_
;
double
sum_weights_
;
/*! \brief Name of test set */
std
::
string
name_
;
/*! \brief Sigmoid parameter */
...
...
@@ -172,7 +172,7 @@ public:
weights_
=
metadata
.
weights
();
if
(
weights_
==
nullptr
)
{
sum_weights_
=
static_cast
<
float
>
(
num_data_
);
sum_weights_
=
static_cast
<
double
>
(
num_data_
);
}
else
{
sum_weights_
=
0.0
f
;
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
...
...
@@ -181,7 +181,7 @@ public:
}
}
std
::
vector
<
float
>
Eval
(
const
score_t
*
score
)
const
override
{
std
::
vector
<
double
>
Eval
(
const
score_t
*
score
)
const
override
{
// get indices sorted by score, descent order
std
::
vector
<
data_size_t
>
sorted_idx
;
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
...
...
@@ -189,13 +189,13 @@ public:
}
std
::
sort
(
sorted_idx
.
begin
(),
sorted_idx
.
end
(),
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
// temp sum of postive label
score_t
cur_pos
=
0.0
f
;
double
cur_pos
=
0.0
f
;
// total sum of postive label
score_t
sum_pos
=
0.0
f
;
double
sum_pos
=
0.0
f
;
// accumlate of auc
score_t
accum
=
0.0
f
;
double
accum
=
0.0
f
;
// temp sum of negative label
score_t
cur_neg
=
0.0
f
;
double
cur_neg
=
0.0
f
;
score_t
threshold
=
score
[
sorted_idx
[
0
]];
if
(
weights_
==
nullptr
)
{
// no weights
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
...
...
@@ -233,11 +233,11 @@ public:
}
accum
+=
cur_neg
*
(
cur_pos
*
0.5
f
+
sum_pos
);
sum_pos
+=
cur_pos
;
score_t
auc
=
1.0
f
;
double
auc
=
1.0
f
;
if
(
sum_pos
>
0.0
f
&&
sum_pos
!=
sum_weights_
)
{
auc
=
accum
/
(
sum_pos
*
(
sum_weights_
-
sum_pos
));
}
return
std
::
vector
<
float
>
(
1
,
static_cast
<
float
>
(
auc
)
)
;
return
std
::
vector
<
double
>
(
1
,
auc
);
}
private:
...
...
@@ -248,7 +248,7 @@ private:
/*! \brief Pointer of weighs */
const
float
*
weights_
;
/*! \brief Sum weights */
float
sum_weights_
;
double
sum_weights_
;
/*! \brief Name of test set */
std
::
string
name_
;
};
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment