Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
516bd37a
Commit
516bd37a
authored
Nov 05, 2019
by
Truman, Wentao TIAN
Committed by
Guolin Ke
Nov 05, 2019
Browse files
renamed variables from bias to offset (#2539)
parent
bd7e1842
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
67 additions
and
67 deletions
+67
-67
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+11
-11
src/boosting/rf.hpp
src/boosting/rf.hpp
+5
-5
src/c_api.cpp
src/c_api.cpp
+8
-8
src/io/dense_bin.hpp
src/io/dense_bin.hpp
+4
-4
src/io/dense_nbits_bin.hpp
src/io/dense_nbits_bin.hpp
+4
-4
src/io/parser.hpp
src/io/parser.hpp
+6
-6
src/io/sparse_bin.hpp
src/io/sparse_bin.hpp
+4
-4
src/objective/multiclass_objective.hpp
src/objective/multiclass_objective.hpp
+2
-2
src/treelearner/feature_histogram.hpp
src/treelearner/feature_histogram.hpp
+21
-21
src/treelearner/voting_parallel_tree_learner.cpp
src/treelearner/voting_parallel_tree_learner.cpp
+2
-2
No files found.
src/boosting/gbdt.cpp
View file @
516bd37a
...
@@ -310,9 +310,9 @@ void GBDT::RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction)
...
@@ -310,9 +310,9 @@ void GBDT::RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction)
leaf_pred
[
i
]
=
tree_leaf_prediction
[
i
][
model_index
];
leaf_pred
[
i
]
=
tree_leaf_prediction
[
i
][
model_index
];
CHECK
(
leaf_pred
[
i
]
<
models_
[
model_index
]
->
num_leaves
());
CHECK
(
leaf_pred
[
i
]
<
models_
[
model_index
]
->
num_leaves
());
}
}
size_t
bias
=
static_cast
<
size_t
>
(
tree_id
)
*
num_data_
;
size_t
offset
=
static_cast
<
size_t
>
(
tree_id
)
*
num_data_
;
auto
grad
=
gradients_
.
data
()
+
bias
;
auto
grad
=
gradients_
.
data
()
+
offset
;
auto
hess
=
hessians_
.
data
()
+
bias
;
auto
hess
=
hessians_
.
data
()
+
offset
;
auto
new_tree
=
tree_learner_
->
FitByExistingTree
(
models_
[
model_index
].
get
(),
leaf_pred
,
grad
,
hess
);
auto
new_tree
=
tree_learner_
->
FitByExistingTree
(
models_
[
model_index
].
get
(),
leaf_pred
,
grad
,
hess
);
train_score_updater_
->
AddScore
(
tree_learner_
.
get
(),
new_tree
,
tree_id
);
train_score_updater_
->
AddScore
(
tree_learner_
.
get
(),
new_tree
,
tree_id
);
models_
[
model_index
].
reset
(
new_tree
);
models_
[
model_index
].
reset
(
new_tree
);
...
@@ -381,26 +381,26 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
...
@@ -381,26 +381,26 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
bool
should_continue
=
false
;
bool
should_continue
=
false
;
for
(
int
cur_tree_id
=
0
;
cur_tree_id
<
num_tree_per_iteration_
;
++
cur_tree_id
)
{
for
(
int
cur_tree_id
=
0
;
cur_tree_id
<
num_tree_per_iteration_
;
++
cur_tree_id
)
{
const
size_t
bias
=
static_cast
<
size_t
>
(
cur_tree_id
)
*
num_data_
;
const
size_t
offset
=
static_cast
<
size_t
>
(
cur_tree_id
)
*
num_data_
;
std
::
unique_ptr
<
Tree
>
new_tree
(
new
Tree
(
2
));
std
::
unique_ptr
<
Tree
>
new_tree
(
new
Tree
(
2
));
if
(
class_need_train_
[
cur_tree_id
]
&&
train_data_
->
num_features
()
>
0
)
{
if
(
class_need_train_
[
cur_tree_id
]
&&
train_data_
->
num_features
()
>
0
)
{
auto
grad
=
gradients
+
bias
;
auto
grad
=
gradients
+
offset
;
auto
hess
=
hessians
+
bias
;
auto
hess
=
hessians
+
offset
;
// need to copy gradients for bagging subset.
// need to copy gradients for bagging subset.
if
(
is_use_subset_
&&
bag_data_cnt_
<
num_data_
)
{
if
(
is_use_subset_
&&
bag_data_cnt_
<
num_data_
)
{
for
(
int
i
=
0
;
i
<
bag_data_cnt_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
bag_data_cnt_
;
++
i
)
{
gradients_
[
bias
+
i
]
=
grad
[
bag_data_indices_
[
i
]];
gradients_
[
offset
+
i
]
=
grad
[
bag_data_indices_
[
i
]];
hessians_
[
bias
+
i
]
=
hess
[
bag_data_indices_
[
i
]];
hessians_
[
offset
+
i
]
=
hess
[
bag_data_indices_
[
i
]];
}
}
grad
=
gradients_
.
data
()
+
bias
;
grad
=
gradients_
.
data
()
+
offset
;
hess
=
hessians_
.
data
()
+
bias
;
hess
=
hessians_
.
data
()
+
offset
;
}
}
new_tree
.
reset
(
tree_learner_
->
Train
(
grad
,
hess
,
is_constant_hessian_
,
forced_splits_json_
));
new_tree
.
reset
(
tree_learner_
->
Train
(
grad
,
hess
,
is_constant_hessian_
,
forced_splits_json_
));
}
}
if
(
new_tree
->
num_leaves
()
>
1
)
{
if
(
new_tree
->
num_leaves
()
>
1
)
{
should_continue
=
true
;
should_continue
=
true
;
auto
score_ptr
=
train_score_updater_
->
score
()
+
bias
;
auto
score_ptr
=
train_score_updater_
->
score
()
+
offset
;
auto
residual_getter
=
[
score_ptr
](
const
label_t
*
label
,
int
i
)
{
return
static_cast
<
double
>
(
label
[
i
])
-
score_ptr
[
i
];
};
auto
residual_getter
=
[
score_ptr
](
const
label_t
*
label
,
int
i
)
{
return
static_cast
<
double
>
(
label
[
i
])
-
score_ptr
[
i
];
};
tree_learner_
->
RenewTreeOutput
(
new_tree
.
get
(),
objective_function_
,
residual_getter
,
tree_learner_
->
RenewTreeOutput
(
new_tree
.
get
(),
objective_function_
,
residual_getter
,
num_data_
,
bag_data_indices_
.
data
(),
bag_data_cnt_
);
num_data_
,
bag_data_indices_
.
data
(),
bag_data_cnt_
);
...
...
src/boosting/rf.hpp
View file @
516bd37a
...
@@ -91,9 +91,9 @@ class RF : public GBDT {
...
@@ -91,9 +91,9 @@ class RF : public GBDT {
std
::
vector
<
double
>
tmp_scores
(
total_size
,
0.0
f
);
std
::
vector
<
double
>
tmp_scores
(
total_size
,
0.0
f
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
int
j
=
0
;
j
<
num_tree_per_iteration_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_tree_per_iteration_
;
++
j
)
{
size_t
bias
=
static_cast
<
size_t
>
(
j
)
*
num_data_
;
size_t
offset
=
static_cast
<
size_t
>
(
j
)
*
num_data_
;
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
tmp_scores
[
bias
+
i
]
=
init_scores_
[
j
];
tmp_scores
[
offset
+
i
]
=
init_scores_
[
j
];
}
}
}
}
objective_function_
->
objective_function_
->
...
@@ -110,10 +110,10 @@ class RF : public GBDT {
...
@@ -110,10 +110,10 @@ class RF : public GBDT {
hessians
=
hessians_
.
data
();
hessians
=
hessians_
.
data
();
for
(
int
cur_tree_id
=
0
;
cur_tree_id
<
num_tree_per_iteration_
;
++
cur_tree_id
)
{
for
(
int
cur_tree_id
=
0
;
cur_tree_id
<
num_tree_per_iteration_
;
++
cur_tree_id
)
{
std
::
unique_ptr
<
Tree
>
new_tree
(
new
Tree
(
2
));
std
::
unique_ptr
<
Tree
>
new_tree
(
new
Tree
(
2
));
size_t
bias
=
static_cast
<
size_t
>
(
cur_tree_id
)
*
num_data_
;
size_t
offset
=
static_cast
<
size_t
>
(
cur_tree_id
)
*
num_data_
;
if
(
class_need_train_
[
cur_tree_id
])
{
if
(
class_need_train_
[
cur_tree_id
])
{
auto
grad
=
gradients
+
bias
;
auto
grad
=
gradients
+
offset
;
auto
hess
=
hessians
+
bias
;
auto
hess
=
hessians
+
offset
;
// need to copy gradients for bagging subset.
// need to copy gradients for bagging subset.
if
(
is_use_subset_
&&
bag_data_cnt_
<
num_data_
)
{
if
(
is_use_subset_
&&
bag_data_cnt_
<
num_data_
)
{
...
...
src/c_api.cpp
View file @
516bd37a
...
@@ -1764,8 +1764,8 @@ IterateFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* ind
...
@@ -1764,8 +1764,8 @@ IterateFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* ind
const
int32_t
*
ptr_col_ptr
=
reinterpret_cast
<
const
int32_t
*>
(
col_ptr
);
const
int32_t
*
ptr_col_ptr
=
reinterpret_cast
<
const
int32_t
*>
(
col_ptr
);
int64_t
start
=
ptr_col_ptr
[
col_idx
];
int64_t
start
=
ptr_col_ptr
[
col_idx
];
int64_t
end
=
ptr_col_ptr
[
col_idx
+
1
];
int64_t
end
=
ptr_col_ptr
[
col_idx
+
1
];
return
[
=
]
(
int
bias
)
{
return
[
=
]
(
int
offset
)
{
int64_t
i
=
static_cast
<
int64_t
>
(
start
+
bias
);
int64_t
i
=
static_cast
<
int64_t
>
(
start
+
offset
);
if
(
i
>=
end
)
{
if
(
i
>=
end
)
{
return
std
::
make_pair
(
-
1
,
0.0
);
return
std
::
make_pair
(
-
1
,
0.0
);
}
}
...
@@ -1777,8 +1777,8 @@ IterateFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* ind
...
@@ -1777,8 +1777,8 @@ IterateFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* ind
const
int64_t
*
ptr_col_ptr
=
reinterpret_cast
<
const
int64_t
*>
(
col_ptr
);
const
int64_t
*
ptr_col_ptr
=
reinterpret_cast
<
const
int64_t
*>
(
col_ptr
);
int64_t
start
=
ptr_col_ptr
[
col_idx
];
int64_t
start
=
ptr_col_ptr
[
col_idx
];
int64_t
end
=
ptr_col_ptr
[
col_idx
+
1
];
int64_t
end
=
ptr_col_ptr
[
col_idx
+
1
];
return
[
=
]
(
int
bias
)
{
return
[
=
]
(
int
offset
)
{
int64_t
i
=
static_cast
<
int64_t
>
(
start
+
bias
);
int64_t
i
=
static_cast
<
int64_t
>
(
start
+
offset
);
if
(
i
>=
end
)
{
if
(
i
>=
end
)
{
return
std
::
make_pair
(
-
1
,
0.0
);
return
std
::
make_pair
(
-
1
,
0.0
);
}
}
...
@@ -1793,8 +1793,8 @@ IterateFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* ind
...
@@ -1793,8 +1793,8 @@ IterateFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* ind
const
int32_t
*
ptr_col_ptr
=
reinterpret_cast
<
const
int32_t
*>
(
col_ptr
);
const
int32_t
*
ptr_col_ptr
=
reinterpret_cast
<
const
int32_t
*>
(
col_ptr
);
int64_t
start
=
ptr_col_ptr
[
col_idx
];
int64_t
start
=
ptr_col_ptr
[
col_idx
];
int64_t
end
=
ptr_col_ptr
[
col_idx
+
1
];
int64_t
end
=
ptr_col_ptr
[
col_idx
+
1
];
return
[
=
]
(
int
bias
)
{
return
[
=
]
(
int
offset
)
{
int64_t
i
=
static_cast
<
int64_t
>
(
start
+
bias
);
int64_t
i
=
static_cast
<
int64_t
>
(
start
+
offset
);
if
(
i
>=
end
)
{
if
(
i
>=
end
)
{
return
std
::
make_pair
(
-
1
,
0.0
);
return
std
::
make_pair
(
-
1
,
0.0
);
}
}
...
@@ -1806,8 +1806,8 @@ IterateFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* ind
...
@@ -1806,8 +1806,8 @@ IterateFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* ind
const
int64_t
*
ptr_col_ptr
=
reinterpret_cast
<
const
int64_t
*>
(
col_ptr
);
const
int64_t
*
ptr_col_ptr
=
reinterpret_cast
<
const
int64_t
*>
(
col_ptr
);
int64_t
start
=
ptr_col_ptr
[
col_idx
];
int64_t
start
=
ptr_col_ptr
[
col_idx
];
int64_t
end
=
ptr_col_ptr
[
col_idx
+
1
];
int64_t
end
=
ptr_col_ptr
[
col_idx
+
1
];
return
[
=
]
(
int
bias
)
{
return
[
=
]
(
int
offset
)
{
int64_t
i
=
static_cast
<
int64_t
>
(
start
+
bias
);
int64_t
i
=
static_cast
<
int64_t
>
(
start
+
offset
);
if
(
i
>=
end
)
{
if
(
i
>=
end
)
{
return
std
::
make_pair
(
-
1
,
0.0
);
return
std
::
make_pair
(
-
1
,
0.0
);
}
}
...
...
src/io/dense_bin.hpp
View file @
516bd37a
...
@@ -24,9 +24,9 @@ class DenseBinIterator: public BinIterator {
...
@@ -24,9 +24,9 @@ class DenseBinIterator: public BinIterator {
max_bin_
(
static_cast
<
VAL_T
>
(
max_bin
)),
max_bin_
(
static_cast
<
VAL_T
>
(
max_bin
)),
default_bin_
(
static_cast
<
VAL_T
>
(
default_bin
))
{
default_bin_
(
static_cast
<
VAL_T
>
(
default_bin
))
{
if
(
default_bin_
==
0
)
{
if
(
default_bin_
==
0
)
{
bias
_
=
1
;
offset
_
=
1
;
}
else
{
}
else
{
bias
_
=
0
;
offset
_
=
0
;
}
}
}
}
inline
uint32_t
RawGet
(
data_size_t
idx
)
override
;
inline
uint32_t
RawGet
(
data_size_t
idx
)
override
;
...
@@ -38,7 +38,7 @@ class DenseBinIterator: public BinIterator {
...
@@ -38,7 +38,7 @@ class DenseBinIterator: public BinIterator {
VAL_T
min_bin_
;
VAL_T
min_bin_
;
VAL_T
max_bin_
;
VAL_T
max_bin_
;
VAL_T
default_bin_
;
VAL_T
default_bin_
;
uint8_t
bias
_
;
uint8_t
offset
_
;
};
};
/*!
/*!
* \brief Used to store bins for dense feature
* \brief Used to store bins for dense feature
...
@@ -334,7 +334,7 @@ template <typename VAL_T>
...
@@ -334,7 +334,7 @@ template <typename VAL_T>
uint32_t
DenseBinIterator
<
VAL_T
>::
Get
(
data_size_t
idx
)
{
uint32_t
DenseBinIterator
<
VAL_T
>::
Get
(
data_size_t
idx
)
{
auto
ret
=
bin_data_
->
data_
[
idx
];
auto
ret
=
bin_data_
->
data_
[
idx
];
if
(
ret
>=
min_bin_
&&
ret
<=
max_bin_
)
{
if
(
ret
>=
min_bin_
&&
ret
<=
max_bin_
)
{
return
ret
-
min_bin_
+
bias
_
;
return
ret
-
min_bin_
+
offset
_
;
}
else
{
}
else
{
return
default_bin_
;
return
default_bin_
;
}
}
...
...
src/io/dense_nbits_bin.hpp
View file @
516bd37a
...
@@ -22,9 +22,9 @@ class Dense4bitsBinIterator : public BinIterator {
...
@@ -22,9 +22,9 @@ class Dense4bitsBinIterator : public BinIterator {
max_bin_
(
static_cast
<
uint8_t
>
(
max_bin
)),
max_bin_
(
static_cast
<
uint8_t
>
(
max_bin
)),
default_bin_
(
static_cast
<
uint8_t
>
(
default_bin
))
{
default_bin_
(
static_cast
<
uint8_t
>
(
default_bin
))
{
if
(
default_bin_
==
0
)
{
if
(
default_bin_
==
0
)
{
bias
_
=
1
;
offset
_
=
1
;
}
else
{
}
else
{
bias
_
=
0
;
offset
_
=
0
;
}
}
}
}
inline
uint32_t
RawGet
(
data_size_t
idx
)
override
;
inline
uint32_t
RawGet
(
data_size_t
idx
)
override
;
...
@@ -36,7 +36,7 @@ class Dense4bitsBinIterator : public BinIterator {
...
@@ -36,7 +36,7 @@ class Dense4bitsBinIterator : public BinIterator {
uint8_t
min_bin_
;
uint8_t
min_bin_
;
uint8_t
max_bin_
;
uint8_t
max_bin_
;
uint8_t
default_bin_
;
uint8_t
default_bin_
;
uint8_t
bias
_
;
uint8_t
offset
_
;
};
};
class
Dense4bitsBin
:
public
Bin
{
class
Dense4bitsBin
:
public
Bin
{
...
@@ -383,7 +383,7 @@ class Dense4bitsBin : public Bin {
...
@@ -383,7 +383,7 @@ class Dense4bitsBin : public Bin {
uint32_t
Dense4bitsBinIterator
::
Get
(
data_size_t
idx
)
{
uint32_t
Dense4bitsBinIterator
::
Get
(
data_size_t
idx
)
{
const
auto
bin
=
(
bin_data_
->
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
const
auto
bin
=
(
bin_data_
->
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
if
(
bin
>=
min_bin_
&&
bin
<=
max_bin_
)
{
if
(
bin
>=
min_bin_
&&
bin
<=
max_bin_
)
{
return
bin
-
min_bin_
+
bias
_
;
return
bin
-
min_bin_
+
offset
_
;
}
else
{
}
else
{
return
default_bin_
;
return
default_bin_
;
}
}
...
...
src/io/parser.hpp
View file @
516bd37a
...
@@ -24,15 +24,15 @@ class CSVParser: public Parser {
...
@@ -24,15 +24,15 @@ class CSVParser: public Parser {
std
::
vector
<
std
::
pair
<
int
,
double
>>*
out_features
,
double
*
out_label
)
const
override
{
std
::
vector
<
std
::
pair
<
int
,
double
>>*
out_features
,
double
*
out_label
)
const
override
{
int
idx
=
0
;
int
idx
=
0
;
double
val
=
0.0
f
;
double
val
=
0.0
f
;
int
bias
=
0
;
int
offset
=
0
;
*
out_label
=
0.0
f
;
*
out_label
=
0.0
f
;
while
(
*
str
!=
'\0'
)
{
while
(
*
str
!=
'\0'
)
{
str
=
Common
::
Atof
(
str
,
&
val
);
str
=
Common
::
Atof
(
str
,
&
val
);
if
(
idx
==
label_idx_
)
{
if
(
idx
==
label_idx_
)
{
*
out_label
=
val
;
*
out_label
=
val
;
bias
=
-
1
;
offset
=
-
1
;
}
else
if
(
std
::
fabs
(
val
)
>
kZeroThreshold
||
std
::
isnan
(
val
))
{
}
else
if
(
std
::
fabs
(
val
)
>
kZeroThreshold
||
std
::
isnan
(
val
))
{
out_features
->
emplace_back
(
idx
+
bias
,
val
);
out_features
->
emplace_back
(
idx
+
offset
,
val
);
}
}
++
idx
;
++
idx
;
if
(
*
str
==
','
)
{
if
(
*
str
==
','
)
{
...
@@ -61,14 +61,14 @@ class TSVParser: public Parser {
...
@@ -61,14 +61,14 @@ class TSVParser: public Parser {
std
::
vector
<
std
::
pair
<
int
,
double
>>*
out_features
,
double
*
out_label
)
const
override
{
std
::
vector
<
std
::
pair
<
int
,
double
>>*
out_features
,
double
*
out_label
)
const
override
{
int
idx
=
0
;
int
idx
=
0
;
double
val
=
0.0
f
;
double
val
=
0.0
f
;
int
bias
=
0
;
int
offset
=
0
;
while
(
*
str
!=
'\0'
)
{
while
(
*
str
!=
'\0'
)
{
str
=
Common
::
Atof
(
str
,
&
val
);
str
=
Common
::
Atof
(
str
,
&
val
);
if
(
idx
==
label_idx_
)
{
if
(
idx
==
label_idx_
)
{
*
out_label
=
val
;
*
out_label
=
val
;
bias
=
-
1
;
offset
=
-
1
;
}
else
if
(
std
::
fabs
(
val
)
>
kZeroThreshold
||
std
::
isnan
(
val
))
{
}
else
if
(
std
::
fabs
(
val
)
>
kZeroThreshold
||
std
::
isnan
(
val
))
{
out_features
->
emplace_back
(
idx
+
bias
,
val
);
out_features
->
emplace_back
(
idx
+
offset
,
val
);
}
}
++
idx
;
++
idx
;
if
(
*
str
==
'\t'
)
{
if
(
*
str
==
'\t'
)
{
...
...
src/io/sparse_bin.hpp
View file @
516bd37a
...
@@ -31,9 +31,9 @@ class SparseBinIterator: public BinIterator {
...
@@ -31,9 +31,9 @@ class SparseBinIterator: public BinIterator {
max_bin_
(
static_cast
<
VAL_T
>
(
max_bin
)),
max_bin_
(
static_cast
<
VAL_T
>
(
max_bin
)),
default_bin_
(
static_cast
<
VAL_T
>
(
default_bin
))
{
default_bin_
(
static_cast
<
VAL_T
>
(
default_bin
))
{
if
(
default_bin_
==
0
)
{
if
(
default_bin_
==
0
)
{
bias
_
=
1
;
offset
_
=
1
;
}
else
{
}
else
{
bias
_
=
0
;
offset
_
=
0
;
}
}
Reset
(
0
);
Reset
(
0
);
}
}
...
@@ -48,7 +48,7 @@ class SparseBinIterator: public BinIterator {
...
@@ -48,7 +48,7 @@ class SparseBinIterator: public BinIterator {
inline
uint32_t
Get
(
data_size_t
idx
)
override
{
inline
uint32_t
Get
(
data_size_t
idx
)
override
{
VAL_T
ret
=
InnerRawGet
(
idx
);
VAL_T
ret
=
InnerRawGet
(
idx
);
if
(
ret
>=
min_bin_
&&
ret
<=
max_bin_
)
{
if
(
ret
>=
min_bin_
&&
ret
<=
max_bin_
)
{
return
ret
-
min_bin_
+
bias
_
;
return
ret
-
min_bin_
+
offset
_
;
}
else
{
}
else
{
return
default_bin_
;
return
default_bin_
;
}
}
...
@@ -63,7 +63,7 @@ class SparseBinIterator: public BinIterator {
...
@@ -63,7 +63,7 @@ class SparseBinIterator: public BinIterator {
VAL_T
min_bin_
;
VAL_T
min_bin_
;
VAL_T
max_bin_
;
VAL_T
max_bin_
;
VAL_T
default_bin_
;
VAL_T
default_bin_
;
uint8_t
bias
_
;
uint8_t
offset
_
;
};
};
template
<
typename
VAL_T
>
template
<
typename
VAL_T
>
...
...
src/objective/multiclass_objective.hpp
View file @
516bd37a
...
@@ -221,8 +221,8 @@ class MulticlassOVA: public ObjectiveFunction {
...
@@ -221,8 +221,8 @@ class MulticlassOVA: public ObjectiveFunction {
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
for
(
int
i
=
0
;
i
<
num_class_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_class_
;
++
i
)
{
int64_t
bias
=
static_cast
<
int64_t
>
(
num_data_
)
*
i
;
int64_t
offset
=
static_cast
<
int64_t
>
(
num_data_
)
*
i
;
binary_loss_
[
i
]
->
GetGradients
(
score
+
bias
,
gradients
+
bias
,
hessians
+
bias
);
binary_loss_
[
i
]
->
GetGradients
(
score
+
offset
,
gradients
+
offset
,
hessians
+
offset
);
}
}
}
}
...
...
src/treelearner/feature_histogram.hpp
View file @
516bd37a
...
@@ -23,7 +23,7 @@ class FeatureMetainfo {
...
@@ -23,7 +23,7 @@ class FeatureMetainfo {
public:
public:
int
num_bin
;
int
num_bin
;
MissingType
missing_type
;
MissingType
missing_type
;
int8_t
bias
=
0
;
int8_t
offset
=
0
;
uint32_t
default_bin
;
uint32_t
default_bin
;
int8_t
monotone_type
;
int8_t
monotone_type
;
double
penalty
;
double
penalty
;
...
@@ -73,7 +73,7 @@ class FeatureHistogram {
...
@@ -73,7 +73,7 @@ class FeatureHistogram {
* \param other The histogram that want to subtract
* \param other The histogram that want to subtract
*/
*/
void
Subtract
(
const
FeatureHistogram
&
other
)
{
void
Subtract
(
const
FeatureHistogram
&
other
)
{
for
(
int
i
=
0
;
i
<
meta_
->
num_bin
-
meta_
->
bias
;
++
i
)
{
for
(
int
i
=
0
;
i
<
meta_
->
num_bin
-
meta_
->
offset
;
++
i
)
{
data_
[
i
].
cnt
-=
other
.
data_
[
i
].
cnt
;
data_
[
i
].
cnt
-=
other
.
data_
[
i
].
cnt
;
data_
[
i
].
sum_gradients
-=
other
.
data_
[
i
].
sum_gradients
;
data_
[
i
].
sum_gradients
-=
other
.
data_
[
i
].
sum_gradients
;
data_
[
i
].
sum_hessians
-=
other
.
data_
[
i
].
sum_hessians
;
data_
[
i
].
sum_hessians
-=
other
.
data_
[
i
].
sum_hessians
;
...
@@ -298,7 +298,7 @@ class FeatureHistogram {
...
@@ -298,7 +298,7 @@ class FeatureHistogram {
double
min_gain_shift
=
gain_shift
+
meta_
->
config
->
min_gain_to_split
;
double
min_gain_shift
=
gain_shift
+
meta_
->
config
->
min_gain_to_split
;
// do stuff here
// do stuff here
const
int8_t
bias
=
meta_
->
bias
;
const
int8_t
offset
=
meta_
->
offset
;
double
sum_right_gradient
=
0.0
f
;
double
sum_right_gradient
=
0.0
f
;
double
sum_right_hessian
=
kEpsilon
;
double
sum_right_hessian
=
kEpsilon
;
...
@@ -313,15 +313,15 @@ class FeatureHistogram {
...
@@ -313,15 +313,15 @@ class FeatureHistogram {
use_na_as_missing
=
true
;
use_na_as_missing
=
true
;
}
}
int
t
=
meta_
->
num_bin
-
1
-
bias
-
use_na_as_missing
;
int
t
=
meta_
->
num_bin
-
1
-
offset
-
use_na_as_missing
;
const
int
t_end
=
1
-
bias
;
const
int
t_end
=
1
-
offset
;
// from right to left, and we don't need data in bin0
// from right to left, and we don't need data in bin0
for
(;
t
>=
t_end
;
--
t
)
{
for
(;
t
>=
t_end
;
--
t
)
{
if
(
static_cast
<
uint32_t
>
(
t
+
bias
)
<
threshold
)
{
break
;
}
if
(
static_cast
<
uint32_t
>
(
t
+
offset
)
<
threshold
)
{
break
;
}
// need to skip default bin
// need to skip default bin
if
(
skip_default_bin
&&
(
t
+
bias
)
==
static_cast
<
int
>
(
meta_
->
default_bin
))
{
continue
;
}
if
(
skip_default_bin
&&
(
t
+
offset
)
==
static_cast
<
int
>
(
meta_
->
default_bin
))
{
continue
;
}
sum_right_gradient
+=
data_
[
t
].
sum_gradients
;
sum_right_gradient
+=
data_
[
t
].
sum_gradients
;
sum_right_hessian
+=
data_
[
t
].
sum_hessians
;
sum_right_hessian
+=
data_
[
t
].
sum_hessians
;
...
@@ -423,14 +423,14 @@ class FeatureHistogram {
...
@@ -423,14 +423,14 @@ class FeatureHistogram {
* \brief Binary size of this histogram
* \brief Binary size of this histogram
*/
*/
int
SizeOfHistgram
()
const
{
int
SizeOfHistgram
()
const
{
return
(
meta_
->
num_bin
-
meta_
->
bias
)
*
sizeof
(
HistogramBinEntry
);
return
(
meta_
->
num_bin
-
meta_
->
offset
)
*
sizeof
(
HistogramBinEntry
);
}
}
/*!
/*!
* \brief Restore histogram from memory
* \brief Restore histogram from memory
*/
*/
void
FromMemory
(
char
*
memory_data
)
{
void
FromMemory
(
char
*
memory_data
)
{
std
::
memcpy
(
data_
,
memory_data
,
(
meta_
->
num_bin
-
meta_
->
bias
)
*
sizeof
(
HistogramBinEntry
));
std
::
memcpy
(
data_
,
memory_data
,
(
meta_
->
num_bin
-
meta_
->
offset
)
*
sizeof
(
HistogramBinEntry
));
}
}
/*!
/*!
...
@@ -507,7 +507,7 @@ class FeatureHistogram {
...
@@ -507,7 +507,7 @@ class FeatureHistogram {
void
FindBestThresholdSequence
(
double
sum_gradient
,
double
sum_hessian
,
data_size_t
num_data
,
double
min_constraint
,
double
max_constraint
,
void
FindBestThresholdSequence
(
double
sum_gradient
,
double
sum_hessian
,
data_size_t
num_data
,
double
min_constraint
,
double
max_constraint
,
double
min_gain_shift
,
SplitInfo
*
output
,
int
dir
,
bool
skip_default_bin
,
bool
use_na_as_missing
)
{
double
min_gain_shift
,
SplitInfo
*
output
,
int
dir
,
bool
skip_default_bin
,
bool
use_na_as_missing
)
{
const
int8_t
bias
=
meta_
->
bias
;
const
int8_t
offset
=
meta_
->
offset
;
double
best_sum_left_gradient
=
NAN
;
double
best_sum_left_gradient
=
NAN
;
double
best_sum_left_hessian
=
NAN
;
double
best_sum_left_hessian
=
NAN
;
...
@@ -520,13 +520,13 @@ class FeatureHistogram {
...
@@ -520,13 +520,13 @@ class FeatureHistogram {
double
sum_right_hessian
=
kEpsilon
;
double
sum_right_hessian
=
kEpsilon
;
data_size_t
right_count
=
0
;
data_size_t
right_count
=
0
;
int
t
=
meta_
->
num_bin
-
1
-
bias
-
use_na_as_missing
;
int
t
=
meta_
->
num_bin
-
1
-
offset
-
use_na_as_missing
;
const
int
t_end
=
1
-
bias
;
const
int
t_end
=
1
-
offset
;
// from right to left, and we don't need data in bin0
// from right to left, and we don't need data in bin0
for
(;
t
>=
t_end
;
--
t
)
{
for
(;
t
>=
t_end
;
--
t
)
{
// need to skip default bin
// need to skip default bin
if
(
skip_default_bin
&&
(
t
+
bias
)
==
static_cast
<
int
>
(
meta_
->
default_bin
))
{
continue
;
}
if
(
skip_default_bin
&&
(
t
+
offset
)
==
static_cast
<
int
>
(
meta_
->
default_bin
))
{
continue
;
}
sum_right_gradient
+=
data_
[
t
].
sum_gradients
;
sum_right_gradient
+=
data_
[
t
].
sum_gradients
;
sum_right_hessian
+=
data_
[
t
].
sum_hessians
;
sum_right_hessian
+=
data_
[
t
].
sum_hessians
;
...
@@ -558,7 +558,7 @@ class FeatureHistogram {
...
@@ -558,7 +558,7 @@ class FeatureHistogram {
best_sum_left_gradient
=
sum_left_gradient
;
best_sum_left_gradient
=
sum_left_gradient
;
best_sum_left_hessian
=
sum_left_hessian
;
best_sum_left_hessian
=
sum_left_hessian
;
// left is <= threshold, right is > threshold. so this is t-1
// left is <= threshold, right is > threshold. so this is t-1
best_threshold
=
static_cast
<
uint32_t
>
(
t
-
1
+
bias
);
best_threshold
=
static_cast
<
uint32_t
>
(
t
-
1
+
offset
);
best_gain
=
current_gain
;
best_gain
=
current_gain
;
}
}
}
}
...
@@ -568,13 +568,13 @@ class FeatureHistogram {
...
@@ -568,13 +568,13 @@ class FeatureHistogram {
data_size_t
left_count
=
0
;
data_size_t
left_count
=
0
;
int
t
=
0
;
int
t
=
0
;
const
int
t_end
=
meta_
->
num_bin
-
2
-
bias
;
const
int
t_end
=
meta_
->
num_bin
-
2
-
offset
;
if
(
use_na_as_missing
&&
bias
==
1
)
{
if
(
use_na_as_missing
&&
offset
==
1
)
{
sum_left_gradient
=
sum_gradient
;
sum_left_gradient
=
sum_gradient
;
sum_left_hessian
=
sum_hessian
-
kEpsilon
;
sum_left_hessian
=
sum_hessian
-
kEpsilon
;
left_count
=
num_data
;
left_count
=
num_data
;
for
(
int
i
=
0
;
i
<
meta_
->
num_bin
-
bias
;
++
i
)
{
for
(
int
i
=
0
;
i
<
meta_
->
num_bin
-
offset
;
++
i
)
{
sum_left_gradient
-=
data_
[
i
].
sum_gradients
;
sum_left_gradient
-=
data_
[
i
].
sum_gradients
;
sum_left_hessian
-=
data_
[
i
].
sum_hessians
;
sum_left_hessian
-=
data_
[
i
].
sum_hessians
;
left_count
-=
data_
[
i
].
cnt
;
left_count
-=
data_
[
i
].
cnt
;
...
@@ -584,7 +584,7 @@ class FeatureHistogram {
...
@@ -584,7 +584,7 @@ class FeatureHistogram {
for
(;
t
<=
t_end
;
++
t
)
{
for
(;
t
<=
t_end
;
++
t
)
{
// need to skip default bin
// need to skip default bin
if
(
skip_default_bin
&&
(
t
+
bias
)
==
static_cast
<
int
>
(
meta_
->
default_bin
))
{
continue
;
}
if
(
skip_default_bin
&&
(
t
+
offset
)
==
static_cast
<
int
>
(
meta_
->
default_bin
))
{
continue
;
}
if
(
t
>=
0
)
{
if
(
t
>=
0
)
{
sum_left_gradient
+=
data_
[
t
].
sum_gradients
;
sum_left_gradient
+=
data_
[
t
].
sum_gradients
;
sum_left_hessian
+=
data_
[
t
].
sum_hessians
;
sum_left_hessian
+=
data_
[
t
].
sum_hessians
;
...
@@ -616,7 +616,7 @@ class FeatureHistogram {
...
@@ -616,7 +616,7 @@ class FeatureHistogram {
best_left_count
=
left_count
;
best_left_count
=
left_count
;
best_sum_left_gradient
=
sum_left_gradient
;
best_sum_left_gradient
=
sum_left_gradient
;
best_sum_left_hessian
=
sum_left_hessian
;
best_sum_left_hessian
=
sum_left_hessian
;
best_threshold
=
static_cast
<
uint32_t
>
(
t
+
bias
);
best_threshold
=
static_cast
<
uint32_t
>
(
t
+
offset
);
best_gain
=
current_gain
;
best_gain
=
current_gain
;
}
}
}
}
...
@@ -711,9 +711,9 @@ class HistogramPool {
...
@@ -711,9 +711,9 @@ class HistogramPool {
feature_metas_
[
i
].
monotone_type
=
train_data
->
FeatureMonotone
(
i
);
feature_metas_
[
i
].
monotone_type
=
train_data
->
FeatureMonotone
(
i
);
feature_metas_
[
i
].
penalty
=
train_data
->
FeaturePenalte
(
i
);
feature_metas_
[
i
].
penalty
=
train_data
->
FeaturePenalte
(
i
);
if
(
train_data
->
FeatureBinMapper
(
i
)
->
GetDefaultBin
()
==
0
)
{
if
(
train_data
->
FeatureBinMapper
(
i
)
->
GetDefaultBin
()
==
0
)
{
feature_metas_
[
i
].
bias
=
1
;
feature_metas_
[
i
].
offset
=
1
;
}
else
{
}
else
{
feature_metas_
[
i
].
bias
=
0
;
feature_metas_
[
i
].
offset
=
0
;
}
}
feature_metas_
[
i
].
config
=
config
;
feature_metas_
[
i
].
config
=
config
;
feature_metas_
[
i
].
bin_type
=
train_data
->
FeatureBinMapper
(
i
)
->
bin_type
();
feature_metas_
[
i
].
bin_type
=
train_data
->
FeatureBinMapper
(
i
)
->
bin_type
();
...
...
src/treelearner/voting_parallel_tree_learner.cpp
View file @
516bd37a
...
@@ -75,9 +75,9 @@ void VotingParallelTreeLearner<TREELEARNER_T>::Init(const Dataset* train_data, b
...
@@ -75,9 +75,9 @@ void VotingParallelTreeLearner<TREELEARNER_T>::Init(const Dataset* train_data, b
feature_metas_
[
i
].
monotone_type
=
train_data
->
FeatureMonotone
(
i
);
feature_metas_
[
i
].
monotone_type
=
train_data
->
FeatureMonotone
(
i
);
feature_metas_
[
i
].
penalty
=
train_data
->
FeaturePenalte
(
i
);
feature_metas_
[
i
].
penalty
=
train_data
->
FeaturePenalte
(
i
);
if
(
train_data
->
FeatureBinMapper
(
i
)
->
GetDefaultBin
()
==
0
)
{
if
(
train_data
->
FeatureBinMapper
(
i
)
->
GetDefaultBin
()
==
0
)
{
feature_metas_
[
i
].
bias
=
1
;
feature_metas_
[
i
].
offset
=
1
;
}
else
{
}
else
{
feature_metas_
[
i
].
bias
=
0
;
feature_metas_
[
i
].
offset
=
0
;
}
}
feature_metas_
[
i
].
config
=
this
->
config_
;
feature_metas_
[
i
].
config
=
this
->
config_
;
feature_metas_
[
i
].
bin_type
=
train_data
->
FeatureBinMapper
(
i
)
->
bin_type
();
feature_metas_
[
i
].
bin_type
=
train_data
->
FeatureBinMapper
(
i
)
->
bin_type
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment