Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
66b7f032
Commit
66b7f032
authored
Apr 17, 2017
by
Guolin Ke
Browse files
reduce branching in histogram sum-up.
parent
062bfa79
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
552 additions
and
455 deletions
+552
-455
.travis.yml
.travis.yml
+3
-3
CMakeLists.txt
CMakeLists.txt
+1
-1
include/LightGBM/bin.h
include/LightGBM/bin.h
+7
-0
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+3
-2
src/boosting/goss.hpp
src/boosting/goss.hpp
+3
-3
src/c_api.cpp
src/c_api.cpp
+138
-138
src/io/dataset.cpp
src/io/dataset.cpp
+148
-70
src/io/dense_bin.hpp
src/io/dense_bin.hpp
+112
-110
src/io/dense_nbits_bin.hpp
src/io/dense_nbits_bin.hpp
+125
-128
src/io/sparse_bin.hpp
src/io/sparse_bin.hpp
+12
-0
No files found.
.travis.yml
View file @
66b7f032
...
@@ -36,19 +36,19 @@ install:
...
@@ -36,19 +36,19 @@ install:
script
:
script
:
-
cd $TRAVIS_BUILD_DIR
-
cd $TRAVIS_BUILD_DIR
-
mkdir build && cd build && cmake .. && make
-j
-
mkdir build && cd build && cmake .. && make
-
cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
-
cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
-
cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
-
cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
-
cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python test_engine.py && python test_sklearn.py && python test_plotting.py
-
cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python test_engine.py && python test_sklearn.py && python test_plotting.py
-
cd $TRAVIS_BUILD_DIR && pep8 --ignore=E501 --exclude=./compute .
-
cd $TRAVIS_BUILD_DIR && pep8 --ignore=E501 --exclude=./compute .
-
rm -rf build && mkdir build && cd build && cmake -DUSE_MPI=ON ..&& make
-j
-
rm -rf build && mkdir build && cd build && cmake -DUSE_MPI=ON ..&& make
-
cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
-
cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
-
cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
-
cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
-
cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python test_engine.py && python test_sklearn.py && python test_plotting.py
-
cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python test_engine.py && python test_sklearn.py && python test_plotting.py
-
cd $TRAVIS_BUILD_DIR
-
cd $TRAVIS_BUILD_DIR
-
rm -rf build && mkdir build && cd build && cmake -DUSE_GPU=ON -DBOOST_ROOT="$HOME/miniconda/" -DOpenCL_INCLUDE_DIR=$AMDAPPSDK/include/ ..
-
rm -rf build && mkdir build && cd build && cmake -DUSE_GPU=ON -DBOOST_ROOT="$HOME/miniconda/" -DOpenCL_INCLUDE_DIR=$AMDAPPSDK/include/ ..
-
sed -i 's/std::string device_type = "cpu";/std::string device_type = "gpu";/' ../include/LightGBM/config.h
-
sed -i 's/std::string device_type = "cpu";/std::string device_type = "gpu";/' ../include/LightGBM/config.h
-
make
-j$(nproc)
-
make
-
sed -i 's/std::string device_type = "gpu";/std::string device_type = "cpu";/' ../include/LightGBM/config.h
-
sed -i 's/std::string device_type = "gpu";/std::string device_type = "cpu";/' ../include/LightGBM/config.h
-
cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
-
cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
-
cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
-
cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
...
...
CMakeLists.txt
View file @
66b7f032
...
@@ -47,7 +47,7 @@ if(USE_GPU)
...
@@ -47,7 +47,7 @@ if(USE_GPU)
endif
(
USE_GPU
)
endif
(
USE_GPU
)
if
(
UNIX OR MINGW OR CYGWIN
)
if
(
UNIX OR MINGW OR CYGWIN
)
SET
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-pthread -O3 -Wall -std=c++11 -Wno-ignored-attributes"
)
SET
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-pthread -O3 -Wall -std=c++11 -Wno-ignored-attributes
-march=core2 -mtune=native
"
)
endif
()
endif
()
if
(
MSVC
)
if
(
MSVC
)
...
...
include/LightGBM/bin.h
View file @
66b7f032
...
@@ -333,6 +333,10 @@ public:
...
@@ -333,6 +333,10 @@ public:
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
HistogramBinEntry
*
out
)
const
=
0
;
HistogramBinEntry
*
out
)
const
=
0
;
virtual
void
ConstructHistogram
(
data_size_t
num_data
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
HistogramBinEntry
*
out
)
const
=
0
;
/*!
/*!
* \brief Construct histogram of this feature,
* \brief Construct histogram of this feature,
* Note: We use ordered_gradients and ordered_hessians to improve cache hit chance
* Note: We use ordered_gradients and ordered_hessians to improve cache hit chance
...
@@ -348,6 +352,9 @@ public:
...
@@ -348,6 +352,9 @@ public:
virtual
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
num_data
,
virtual
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
num_data
,
const
score_t
*
ordered_gradients
,
HistogramBinEntry
*
out
)
const
=
0
;
const
score_t
*
ordered_gradients
,
HistogramBinEntry
*
out
)
const
=
0
;
virtual
void
ConstructHistogram
(
data_size_t
num_data
,
const
score_t
*
ordered_gradients
,
HistogramBinEntry
*
out
)
const
=
0
;
/*!
/*!
* \brief Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices)
* \brief Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices)
* \param min_bin min_bin of current used feature
* \param min_bin min_bin of current used feature
...
...
src/boosting/gbdt.cpp
View file @
66b7f032
...
@@ -384,7 +384,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
...
@@ -384,7 +384,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
}
}
// get sub gradients
// get sub gradients
for
(
int
cur_tree_id
=
0
;
cur_tree_id
<
num_tree_per_iteration_
;
++
cur_tree_id
)
{
for
(
int
cur_tree_id
=
0
;
cur_tree_id
<
num_tree_per_iteration_
;
++
cur_tree_id
)
{
auto
bias
=
cur_tree_id
*
num_data_
;
size_t
bias
=
static_cast
<
size_t
>
(
cur_tree_id
)
*
num_data_
;
// cannot multi-threading here.
// cannot multi-threading here.
for
(
int
i
=
0
;
i
<
bag_data_cnt_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
bag_data_cnt_
;
++
i
)
{
gradients_
[
bias
+
i
]
=
gradient
[
bias
+
bag_data_indices_
[
i
]];
gradients_
[
bias
+
i
]
=
gradient
[
bias
+
bag_data_indices_
[
i
]];
...
@@ -404,8 +404,9 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
...
@@ -404,8 +404,9 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
#endif
#endif
std
::
unique_ptr
<
Tree
>
new_tree
(
new
Tree
(
2
));
std
::
unique_ptr
<
Tree
>
new_tree
(
new
Tree
(
2
));
if
(
class_need_train_
[
cur_tree_id
])
{
if
(
class_need_train_
[
cur_tree_id
])
{
size_t
bias
=
static_cast
<
size_t
>
(
cur_tree_id
)
*
num_data_
;
new_tree
.
reset
(
new_tree
.
reset
(
tree_learner_
->
Train
(
gradient
+
cur_tree_id
*
num_data_
,
hessian
+
cur_tree_id
*
num_data_
,
is_constant_hessian_
));
tree_learner_
->
Train
(
gradient
+
bias
,
hessian
+
bias
,
is_constant_hessian_
));
}
}
#ifdef TIMETAG
#ifdef TIMETAG
tree_time
+=
std
::
chrono
::
steady_clock
::
now
()
-
start_time
;
tree_time
+=
std
::
chrono
::
steady_clock
::
now
()
-
start_time
;
...
...
src/boosting/goss.hpp
View file @
66b7f032
...
@@ -80,7 +80,7 @@ public:
...
@@ -80,7 +80,7 @@ public:
std
::
vector
<
score_t
>
tmp_gradients
(
cnt
,
0.0
f
);
std
::
vector
<
score_t
>
tmp_gradients
(
cnt
,
0.0
f
);
for
(
data_size_t
i
=
0
;
i
<
cnt
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
cnt
;
++
i
)
{
for
(
int
cur_tree_id
=
0
;
cur_tree_id
<
num_tree_per_iteration_
;
++
cur_tree_id
)
{
for
(
int
cur_tree_id
=
0
;
cur_tree_id
<
num_tree_per_iteration_
;
++
cur_tree_id
)
{
in
t
idx
=
cur_tree_id
*
num_data_
+
start
+
i
;
size_
t
idx
=
static_cast
<
size_t
>
(
cur_tree_id
)
*
num_data_
+
start
+
i
;
tmp_gradients
[
i
]
+=
std
::
fabs
(
gradients_
[
idx
]
*
hessians_
[
idx
]);
tmp_gradients
[
i
]
+=
std
::
fabs
(
gradients_
[
idx
]
*
hessians_
[
idx
]);
}
}
}
}
...
@@ -97,7 +97,7 @@ public:
...
@@ -97,7 +97,7 @@ public:
for
(
data_size_t
i
=
0
;
i
<
cnt
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
cnt
;
++
i
)
{
score_t
grad
=
0.0
f
;
score_t
grad
=
0.0
f
;
for
(
int
cur_tree_id
=
0
;
cur_tree_id
<
num_tree_per_iteration_
;
++
cur_tree_id
)
{
for
(
int
cur_tree_id
=
0
;
cur_tree_id
<
num_tree_per_iteration_
;
++
cur_tree_id
)
{
in
t
idx
=
cur_tree_id
*
num_data_
+
start
+
i
;
size_
t
idx
=
static_cast
<
size_t
>
(
cur_tree_id
)
*
num_data_
+
start
+
i
;
grad
+=
std
::
fabs
(
gradients_
[
idx
]
*
hessians_
[
idx
]);
grad
+=
std
::
fabs
(
gradients_
[
idx
]
*
hessians_
[
idx
]);
}
}
if
(
grad
>=
threshold
)
{
if
(
grad
>=
threshold
)
{
...
@@ -111,7 +111,7 @@ public:
...
@@ -111,7 +111,7 @@ public:
if
(
cur_rand
.
NextFloat
()
<
prob
)
{
if
(
cur_rand
.
NextFloat
()
<
prob
)
{
buffer
[
cur_left_cnt
++
]
=
start
+
i
;
buffer
[
cur_left_cnt
++
]
=
start
+
i
;
for
(
int
cur_tree_id
=
0
;
cur_tree_id
<
num_tree_per_iteration_
;
++
cur_tree_id
)
{
for
(
int
cur_tree_id
=
0
;
cur_tree_id
<
num_tree_per_iteration_
;
++
cur_tree_id
)
{
in
t
idx
=
cur_tree_id
*
num_data_
+
start
+
i
;
size_
t
idx
=
static_cast
<
size_t
>
(
cur_tree_id
)
*
num_data_
+
start
+
i
;
gradients_
[
idx
]
*=
multiply
;
gradients_
[
idx
]
*=
multiply
;
hessians_
[
idx
]
*=
multiply
;
hessians_
[
idx
]
*=
multiply
;
}
}
...
...
src/c_api.cpp
View file @
66b7f032
This diff is collapsed.
Click to expand it.
src/io/dataset.cpp
View file @
66b7f032
...
@@ -435,85 +435,163 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
...
@@ -435,85 +435,163 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
}
}
ptr_ordered_grad
=
ordered_gradients
;
ptr_ordered_grad
=
ordered_gradients
;
ptr_ordered_hess
=
ordered_hessians
;
ptr_ordered_hess
=
ordered_hessians
;
}
if
(
!
is_constant_hessian
)
{
if
(
!
is_constant_hessian
)
{
OMP_INIT_EX
();
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
int
group
=
0
;
group
<
num_groups_
;
++
group
)
{
for
(
int
group
=
0
;
group
<
num_groups_
;
++
group
)
{
OMP_LOOP_EX_BEGIN
();
OMP_LOOP_EX_BEGIN
();
bool
is_groud_used
=
false
;
bool
is_groud_used
=
false
;
const
int
f_cnt
=
group_feature_cnt_
[
group
];
const
int
f_cnt
=
group_feature_cnt_
[
group
];
for
(
int
j
=
0
;
j
<
f_cnt
;
++
j
)
{
for
(
int
j
=
0
;
j
<
f_cnt
;
++
j
)
{
const
int
fidx
=
group_feature_start_
[
group
]
+
j
;
const
int
fidx
=
group_feature_start_
[
group
]
+
j
;
if
(
is_feature_used
[
fidx
])
{
if
(
is_feature_used
[
fidx
])
{
is_groud_used
=
true
;
is_groud_used
=
true
;
break
;
break
;
}
}
if
(
!
is_groud_used
)
{
continue
;
}
// feature is not used
auto
data_ptr
=
hist_data
+
group_bin_boundaries_
[
group
];
const
int
num_bin
=
feature_groups_
[
group
]
->
num_total_bin_
;
std
::
memset
(
data_ptr
+
1
,
0
,
(
num_bin
-
1
)
*
sizeof
(
HistogramBinEntry
));
// construct histograms for smaller leaf
if
(
ordered_bins
[
group
]
==
nullptr
)
{
// if not use ordered bin
feature_groups_
[
group
]
->
bin_data_
->
ConstructHistogram
(
data_indices
,
num_data
,
ptr_ordered_grad
,
ptr_ordered_hess
,
data_ptr
);
}
else
{
// used ordered bin
ordered_bins
[
group
]
->
ConstructHistogram
(
leaf_idx
,
gradients
,
hessians
,
data_ptr
);
}
}
OMP_LOOP_EX_END
();
}
}
if
(
!
is_groud_used
)
{
continue
;
}
OMP_THROW_EX
();
// feature is not used
}
else
{
auto
data_ptr
=
hist_data
+
group_bin_boundaries_
[
group
];
OMP_INIT_EX
();
const
int
num_bin
=
feature_groups_
[
group
]
->
num_total_bin_
;
#pragma omp parallel for schedule(static)
std
::
memset
(
data_ptr
+
1
,
0
,
(
num_bin
-
1
)
*
sizeof
(
HistogramBinEntry
));
for
(
int
group
=
0
;
group
<
num_groups_
;
++
group
)
{
// construct histograms for smaller leaf
OMP_LOOP_EX_BEGIN
();
if
(
ordered_bins
[
group
]
==
nullptr
)
{
bool
is_groud_used
=
false
;
// if not use ordered bin
const
int
f_cnt
=
group_feature_cnt_
[
group
];
feature_groups_
[
group
]
->
bin_data_
->
ConstructHistogram
(
for
(
int
j
=
0
;
j
<
f_cnt
;
++
j
)
{
data_indices
,
const
int
fidx
=
group_feature_start_
[
group
]
+
j
;
num_data
,
if
(
is_feature_used
[
fidx
])
{
ptr_ordered_grad
,
is_groud_used
=
true
;
ptr_ordered_hess
,
break
;
data_ptr
);
}
}
else
{
}
// used ordered bin
if
(
!
is_groud_used
)
{
continue
;
}
ordered_bins
[
group
]
->
ConstructHistogram
(
leaf_idx
,
// feature is not used
gradients
,
auto
data_ptr
=
hist_data
+
group_bin_boundaries_
[
group
];
hessians
,
const
int
num_bin
=
feature_groups_
[
group
]
->
num_total_bin_
;
data_ptr
);
std
::
memset
(
data_ptr
+
1
,
0
,
(
num_bin
-
1
)
*
sizeof
(
HistogramBinEntry
));
// construct histograms for smaller leaf
if
(
ordered_bins
[
group
]
==
nullptr
)
{
// if not use ordered bin
feature_groups_
[
group
]
->
bin_data_
->
ConstructHistogram
(
data_indices
,
num_data
,
ptr_ordered_grad
,
data_ptr
);
}
else
{
// used ordered bin
ordered_bins
[
group
]
->
ConstructHistogram
(
leaf_idx
,
gradients
,
data_ptr
);
}
// fixed hessian.
for
(
int
i
=
0
;
i
<
num_bin
;
++
i
)
{
data_ptr
[
i
].
sum_hessians
=
data_ptr
[
i
].
cnt
*
hessians
[
0
];
}
OMP_LOOP_EX_END
();
}
}
OMP_
LOOP_EX_END
();
OMP_
THROW_EX
();
}
}
OMP_THROW_EX
();
}
else
{
}
else
{
OMP_INIT_EX
();
if
(
!
is_constant_hessian
)
{
#pragma omp parallel for schedule(static)
OMP_INIT_EX
();
for
(
int
group
=
0
;
group
<
num_groups_
;
++
group
)
{
#pragma omp parallel for schedule(static)
OMP_LOOP_EX_BEGIN
();
for
(
int
group
=
0
;
group
<
num_groups_
;
++
group
)
{
bool
is_groud_used
=
false
;
OMP_LOOP_EX_BEGIN
();
const
int
f_cnt
=
group_feature_cnt_
[
group
];
bool
is_groud_used
=
false
;
for
(
int
j
=
0
;
j
<
f_cnt
;
++
j
)
{
const
int
f_cnt
=
group_feature_cnt_
[
group
];
const
int
fidx
=
group_feature_start_
[
group
]
+
j
;
for
(
int
j
=
0
;
j
<
f_cnt
;
++
j
)
{
if
(
is_feature_used
[
fidx
])
{
const
int
fidx
=
group_feature_start_
[
group
]
+
j
;
is_groud_used
=
true
;
if
(
is_feature_used
[
fidx
])
{
break
;
is_groud_used
=
true
;
break
;
}
}
}
if
(
!
is_groud_used
)
{
continue
;
}
// feature is not used
auto
data_ptr
=
hist_data
+
group_bin_boundaries_
[
group
];
const
int
num_bin
=
feature_groups_
[
group
]
->
num_total_bin_
;
std
::
memset
(
data_ptr
+
1
,
0
,
(
num_bin
-
1
)
*
sizeof
(
HistogramBinEntry
));
// construct histograms for smaller leaf
if
(
ordered_bins
[
group
]
==
nullptr
)
{
// if not use ordered bin
feature_groups_
[
group
]
->
bin_data_
->
ConstructHistogram
(
num_data
,
ptr_ordered_grad
,
ptr_ordered_hess
,
data_ptr
);
}
else
{
// used ordered bin
ordered_bins
[
group
]
->
ConstructHistogram
(
leaf_idx
,
gradients
,
hessians
,
data_ptr
);
}
OMP_LOOP_EX_END
();
}
}
if
(
!
is_groud_used
)
{
continue
;
}
OMP_THROW_EX
();
// feature is not used
}
else
{
auto
data_ptr
=
hist_data
+
group_bin_boundaries_
[
group
];
OMP_INIT_EX
();
const
int
num_bin
=
feature_groups_
[
group
]
->
num_total_bin_
;
#pragma omp parallel for schedule(static)
std
::
memset
(
data_ptr
+
1
,
0
,
(
num_bin
-
1
)
*
sizeof
(
HistogramBinEntry
));
for
(
int
group
=
0
;
group
<
num_groups_
;
++
group
)
{
// construct histograms for smaller leaf
OMP_LOOP_EX_BEGIN
();
if
(
ordered_bins
[
group
]
==
nullptr
)
{
bool
is_groud_used
=
false
;
// if not use ordered bin
const
int
f_cnt
=
group_feature_cnt_
[
group
];
feature_groups_
[
group
]
->
bin_data_
->
ConstructHistogram
(
for
(
int
j
=
0
;
j
<
f_cnt
;
++
j
)
{
data_indices
,
const
int
fidx
=
group_feature_start_
[
group
]
+
j
;
num_data
,
if
(
is_feature_used
[
fidx
])
{
ptr_ordered_grad
,
is_groud_used
=
true
;
data_ptr
);
break
;
}
else
{
}
// used ordered bin
}
ordered_bins
[
group
]
->
ConstructHistogram
(
leaf_idx
,
if
(
!
is_groud_used
)
{
continue
;
}
gradients
,
// feature is not used
data_ptr
);
auto
data_ptr
=
hist_data
+
group_bin_boundaries_
[
group
];
}
const
int
num_bin
=
feature_groups_
[
group
]
->
num_total_bin_
;
// fixed hessian.
std
::
memset
(
data_ptr
+
1
,
0
,
(
num_bin
-
1
)
*
sizeof
(
HistogramBinEntry
));
for
(
int
i
=
0
;
i
<
num_bin
;
++
i
)
{
// construct histograms for smaller leaf
data_ptr
[
i
].
sum_hessians
=
data_ptr
[
i
].
cnt
*
hessians
[
0
];
if
(
ordered_bins
[
group
]
==
nullptr
)
{
// if not use ordered bin
feature_groups_
[
group
]
->
bin_data_
->
ConstructHistogram
(
num_data
,
ptr_ordered_grad
,
data_ptr
);
}
else
{
// used ordered bin
ordered_bins
[
group
]
->
ConstructHistogram
(
leaf_idx
,
gradients
,
data_ptr
);
}
// fixed hessian.
for
(
int
i
=
0
;
i
<
num_bin
;
++
i
)
{
data_ptr
[
i
].
sum_hessians
=
data_ptr
[
i
].
cnt
*
hessians
[
0
];
}
OMP_LOOP_EX_END
();
}
}
OMP_
LOOP_EX_END
();
OMP_
THROW_EX
();
}
}
OMP_THROW_EX
();
}
}
}
}
...
...
src/io/dense_bin.hpp
View file @
66b7f032
...
@@ -66,122 +66,124 @@ public:
...
@@ -66,122 +66,124 @@ public:
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
num_data
,
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
num_data
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
HistogramBinEntry
*
out
)
const
override
{
HistogramBinEntry
*
out
)
const
override
{
// use 4-way unrolling, will be faster
const
data_size_t
rest
=
num_data
&
0x3
;
if
(
data_indices
!=
nullptr
)
{
// if use part of data
data_size_t
i
=
0
;
const
data_size_t
rest
=
num_data
&
0x3
;
for
(;
i
<
num_data
-
rest
;
i
+=
4
)
{
data_size_t
i
=
0
;
const
VAL_T
bin0
=
data_
[
data_indices
[
i
]];
for
(;
i
<
num_data
-
rest
;
i
+=
4
)
{
const
VAL_T
bin1
=
data_
[
data_indices
[
i
+
1
]];
const
VAL_T
bin0
=
data_
[
data_indices
[
i
]];
const
VAL_T
bin2
=
data_
[
data_indices
[
i
+
2
]];
const
VAL_T
bin1
=
data_
[
data_indices
[
i
+
1
]];
const
VAL_T
bin3
=
data_
[
data_indices
[
i
+
3
]];
const
VAL_T
bin2
=
data_
[
data_indices
[
i
+
2
]];
const
VAL_T
bin3
=
data_
[
data_indices
[
i
+
3
]];
out
[
bin0
].
sum_gradients
+=
ordered_gradients
[
i
];
out
[
bin1
].
sum_gradients
+=
ordered_gradients
[
i
+
1
];
out
[
bin0
].
sum_gradients
+=
ordered_gradients
[
i
];
out
[
bin2
].
sum_gradients
+=
ordered_gradients
[
i
+
2
];
out
[
bin1
].
sum_gradients
+=
ordered_gradients
[
i
+
1
];
out
[
bin3
].
sum_gradients
+=
ordered_gradients
[
i
+
3
];
out
[
bin2
].
sum_gradients
+=
ordered_gradients
[
i
+
2
];
out
[
bin3
].
sum_gradients
+=
ordered_gradients
[
i
+
3
];
out
[
bin0
].
sum_hessians
+=
ordered_hessians
[
i
];
out
[
bin1
].
sum_hessians
+=
ordered_hessians
[
i
+
1
];
out
[
bin0
].
sum_hessians
+=
ordered_hessians
[
i
];
out
[
bin2
].
sum_hessians
+=
ordered_hessians
[
i
+
2
];
out
[
bin1
].
sum_hessians
+=
ordered_hessians
[
i
+
1
];
out
[
bin3
].
sum_hessians
+=
ordered_hessians
[
i
+
3
];
out
[
bin2
].
sum_hessians
+=
ordered_hessians
[
i
+
2
];
out
[
bin3
].
sum_hessians
+=
ordered_hessians
[
i
+
3
];
++
out
[
bin0
].
cnt
;
++
out
[
bin1
].
cnt
;
++
out
[
bin0
].
cnt
;
++
out
[
bin2
].
cnt
;
++
out
[
bin1
].
cnt
;
++
out
[
bin3
].
cnt
;
++
out
[
bin2
].
cnt
;
}
++
out
[
bin3
].
cnt
;
for
(;
i
<
num_data
;
++
i
)
{
}
const
VAL_T
bin
=
data_
[
data_indices
[
i
]];
for
(;
i
<
num_data
;
++
i
)
{
out
[
bin
].
sum_gradients
+=
ordered_gradients
[
i
];
const
VAL_T
bin
=
data_
[
data_indices
[
i
]];
out
[
bin
].
sum_hessians
+=
ordered_hessians
[
i
];
out
[
bin
].
sum_gradients
+=
ordered_gradients
[
i
];
++
out
[
bin
].
cnt
;
out
[
bin
].
sum_hessians
+=
ordered_hessians
[
i
];
}
++
out
[
bin
].
cnt
;
}
}
}
else
{
// use full data
void
ConstructHistogram
(
data_size_t
num_data
,
const
data_size_t
rest
=
num_data
&
0x3
;
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
data_size_t
i
=
0
;
HistogramBinEntry
*
out
)
const
override
{
for
(;
i
<
num_data
-
rest
;
i
+=
4
)
{
const
data_size_t
rest
=
num_data
&
0x3
;
const
VAL_T
bin0
=
data_
[
i
];
data_size_t
i
=
0
;
const
VAL_T
bin1
=
data_
[
i
+
1
];
for
(;
i
<
num_data
-
rest
;
i
+=
4
)
{
const
VAL_T
bin2
=
data_
[
i
+
2
];
const
VAL_T
bin0
=
data_
[
i
];
const
VAL_T
bin3
=
data_
[
i
+
3
];
const
VAL_T
bin1
=
data_
[
i
+
1
];
const
VAL_T
bin2
=
data_
[
i
+
2
];
out
[
bin0
].
sum_gradients
+=
ordered_gradients
[
i
];
const
VAL_T
bin3
=
data_
[
i
+
3
];
out
[
bin1
].
sum_gradients
+=
ordered_gradients
[
i
+
1
];
out
[
bin2
].
sum_gradients
+=
ordered_gradients
[
i
+
2
];
out
[
bin0
].
sum_gradients
+=
ordered_gradients
[
i
];
out
[
bin3
].
sum_gradients
+=
ordered_gradients
[
i
+
3
];
out
[
bin1
].
sum_gradients
+=
ordered_gradients
[
i
+
1
];
out
[
bin2
].
sum_gradients
+=
ordered_gradients
[
i
+
2
];
out
[
bin0
].
sum_hessians
+=
ordered_hessians
[
i
];
out
[
bin3
].
sum_gradients
+=
ordered_gradients
[
i
+
3
];
out
[
bin1
].
sum_hessians
+=
ordered_hessians
[
i
+
1
];
out
[
bin2
].
sum_hessians
+=
ordered_hessians
[
i
+
2
];
out
[
bin0
].
sum_hessians
+=
ordered_hessians
[
i
];
out
[
bin3
].
sum_hessians
+=
ordered_hessians
[
i
+
3
];
out
[
bin1
].
sum_hessians
+=
ordered_hessians
[
i
+
1
];
out
[
bin2
].
sum_hessians
+=
ordered_hessians
[
i
+
2
];
++
out
[
bin0
].
cnt
;
out
[
bin3
].
sum_hessians
+=
ordered_hessians
[
i
+
3
];
++
out
[
bin1
].
cnt
;
++
out
[
bin2
].
cnt
;
++
out
[
bin0
].
cnt
;
++
out
[
bin3
].
cnt
;
++
out
[
bin1
].
cnt
;
}
++
out
[
bin2
].
cnt
;
for
(;
i
<
num_data
;
++
i
)
{
++
out
[
bin3
].
cnt
;
const
VAL_T
bin
=
data_
[
i
];
}
out
[
bin
].
sum_gradients
+=
ordered_gradients
[
i
];
for
(;
i
<
num_data
;
++
i
)
{
out
[
bin
].
sum_hessians
+=
ordered_hessians
[
i
];
const
VAL_T
bin
=
data_
[
i
];
++
out
[
bin
].
cnt
;
out
[
bin
].
sum_gradients
+=
ordered_gradients
[
i
];
}
out
[
bin
].
sum_hessians
+=
ordered_hessians
[
i
];
++
out
[
bin
].
cnt
;
}
}
}
}
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
num_data
,
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
num_data
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_gradients
,
HistogramBinEntry
*
out
)
const
override
{
HistogramBinEntry
*
out
)
const
override
{
// use 4-way unrolling, will be faster
const
data_size_t
rest
=
num_data
&
0x3
;
if
(
data_indices
!=
nullptr
)
{
// if use part of data
data_size_t
i
=
0
;
const
data_size_t
rest
=
num_data
&
0x3
;
for
(;
i
<
num_data
-
rest
;
i
+=
4
)
{
data_size_t
i
=
0
;
const
VAL_T
bin0
=
data_
[
data_indices
[
i
]];
for
(;
i
<
num_data
-
rest
;
i
+=
4
)
{
const
VAL_T
bin1
=
data_
[
data_indices
[
i
+
1
]];
const
VAL_T
bin0
=
data_
[
data_indices
[
i
]];
const
VAL_T
bin2
=
data_
[
data_indices
[
i
+
2
]];
const
VAL_T
bin1
=
data_
[
data_indices
[
i
+
1
]];
const
VAL_T
bin3
=
data_
[
data_indices
[
i
+
3
]];
const
VAL_T
bin2
=
data_
[
data_indices
[
i
+
2
]];
const
VAL_T
bin3
=
data_
[
data_indices
[
i
+
3
]];
out
[
bin0
].
sum_gradients
+=
ordered_gradients
[
i
];
out
[
bin1
].
sum_gradients
+=
ordered_gradients
[
i
+
1
];
out
[
bin0
].
sum_gradients
+=
ordered_gradients
[
i
];
out
[
bin2
].
sum_gradients
+=
ordered_gradients
[
i
+
2
];
out
[
bin1
].
sum_gradients
+=
ordered_gradients
[
i
+
1
];
out
[
bin3
].
sum_gradients
+=
ordered_gradients
[
i
+
3
];
out
[
bin2
].
sum_gradients
+=
ordered_gradients
[
i
+
2
];
out
[
bin3
].
sum_gradients
+=
ordered_gradients
[
i
+
3
];
++
out
[
bin0
].
cnt
;
++
out
[
bin1
].
cnt
;
++
out
[
bin0
].
cnt
;
++
out
[
bin2
].
cnt
;
++
out
[
bin1
].
cnt
;
++
out
[
bin3
].
cnt
;
++
out
[
bin2
].
cnt
;
}
++
out
[
bin3
].
cnt
;
for
(;
i
<
num_data
;
++
i
)
{
}
const
VAL_T
bin
=
data_
[
data_indices
[
i
]];
for
(;
i
<
num_data
;
++
i
)
{
out
[
bin
].
sum_gradients
+=
ordered_gradients
[
i
];
const
VAL_T
bin
=
data_
[
data_indices
[
i
]];
++
out
[
bin
].
cnt
;
out
[
bin
].
sum_gradients
+=
ordered_gradients
[
i
];
}
++
out
[
bin
].
cnt
;
}
}
}
else
{
// use full data
void
ConstructHistogram
(
data_size_t
num_data
,
const
data_size_t
rest
=
num_data
&
0x3
;
const
score_t
*
ordered_gradients
,
data_size_t
i
=
0
;
HistogramBinEntry
*
out
)
const
override
{
for
(;
i
<
num_data
-
rest
;
i
+=
4
)
{
const
data_size_t
rest
=
num_data
&
0x3
;
const
VAL_T
bin0
=
data_
[
i
];
data_size_t
i
=
0
;
const
VAL_T
bin1
=
data_
[
i
+
1
];
for
(;
i
<
num_data
-
rest
;
i
+=
4
)
{
const
VAL_T
bin2
=
data_
[
i
+
2
];
const
VAL_T
bin0
=
data_
[
i
];
const
VAL_T
bin3
=
data_
[
i
+
3
];
const
VAL_T
bin1
=
data_
[
i
+
1
];
const
VAL_T
bin2
=
data_
[
i
+
2
];
out
[
bin0
].
sum_gradients
+=
ordered_gradients
[
i
];
const
VAL_T
bin3
=
data_
[
i
+
3
];
out
[
bin1
].
sum_gradients
+=
ordered_gradients
[
i
+
1
];
out
[
bin2
].
sum_gradients
+=
ordered_gradients
[
i
+
2
];
out
[
bin0
].
sum_gradients
+=
ordered_gradients
[
i
];
out
[
bin3
].
sum_gradients
+=
ordered_gradients
[
i
+
3
];
out
[
bin1
].
sum_gradients
+=
ordered_gradients
[
i
+
1
];
out
[
bin2
].
sum_gradients
+=
ordered_gradients
[
i
+
2
];
++
out
[
bin0
].
cnt
;
out
[
bin3
].
sum_gradients
+=
ordered_gradients
[
i
+
3
];
++
out
[
bin1
].
cnt
;
++
out
[
bin2
].
cnt
;
++
out
[
bin0
].
cnt
;
++
out
[
bin3
].
cnt
;
++
out
[
bin1
].
cnt
;
}
++
out
[
bin2
].
cnt
;
for
(;
i
<
num_data
;
++
i
)
{
++
out
[
bin3
].
cnt
;
const
VAL_T
bin
=
data_
[
i
];
}
out
[
bin
].
sum_gradients
+=
ordered_gradients
[
i
];
for
(;
i
<
num_data
;
++
i
)
{
++
out
[
bin
].
cnt
;
const
VAL_T
bin
=
data_
[
i
];
}
out
[
bin
].
sum_gradients
+=
ordered_gradients
[
i
];
++
out
[
bin
].
cnt
;
}
}
}
}
...
...
src/io/dense_nbits_bin.hpp
View file @
66b7f032
...
@@ -49,7 +49,7 @@ public:
...
@@ -49,7 +49,7 @@ public:
void
Push
(
int
,
data_size_t
idx
,
uint32_t
value
)
override
{
void
Push
(
int
,
data_size_t
idx
,
uint32_t
value
)
override
{
if
(
buf_
.
empty
())
{
if
(
buf_
.
empty
())
{
#pragma omp critical
#pragma omp critical
{
{
if
(
buf_
.
empty
())
{
if
(
buf_
.
empty
())
{
int
len
=
(
num_data_
+
1
)
/
2
;
int
len
=
(
num_data_
+
1
)
/
2
;
...
@@ -80,152 +80,149 @@ public:
...
@@ -80,152 +80,149 @@ public:
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
num_data
,
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
num_data
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
HistogramBinEntry
*
out
)
const
override
{
HistogramBinEntry
*
out
)
const
override
{
if
(
data_indices
!=
nullptr
)
{
// if use part of data
const
data_size_t
rest
=
num_data
&
0x3
;
const
data_size_t
rest
=
num_data
&
0x3
;
data_size_t
i
=
0
;
data_size_t
i
=
0
;
for
(;
i
<
num_data
-
rest
;
i
+=
4
)
{
for
(;
i
<
num_data
-
rest
;
i
+=
4
)
{
data_size_t
idx
=
data_indices
[
i
];
data_size_t
idx
=
data_indices
[
i
];
const
auto
bin0
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
const
auto
bin0
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
idx
=
data_indices
[
i
+
1
];
idx
=
data_indices
[
i
+
1
];
const
auto
bin1
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
const
auto
bin1
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
idx
=
data_indices
[
i
+
2
];
idx
=
data_indices
[
i
+
2
];
const
auto
bin2
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
const
auto
bin2
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
idx
=
data_indices
[
i
+
3
];
idx
=
data_indices
[
i
+
3
];
const
auto
bin3
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
const
auto
bin3
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
out
[
bin0
].
sum_gradients
+=
ordered_gradients
[
i
];
out
[
bin1
].
sum_gradients
+=
ordered_gradients
[
i
+
1
];
out
[
bin2
].
sum_gradients
+=
ordered_gradients
[
i
+
2
];
out
[
bin3
].
sum_gradients
+=
ordered_gradients
[
i
+
3
];
out
[
bin0
].
sum_
gradient
s
+=
ordered_
gradient
s
[
i
];
out
[
bin0
].
sum_
hessian
s
+=
ordered_
hessian
s
[
i
];
out
[
bin1
].
sum_
gradient
s
+=
ordered_
gradient
s
[
i
+
1
];
out
[
bin1
].
sum_
hessian
s
+=
ordered_
hessian
s
[
i
+
1
];
out
[
bin2
].
sum_
gradient
s
+=
ordered_
gradient
s
[
i
+
2
];
out
[
bin2
].
sum_
hessian
s
+=
ordered_
hessian
s
[
i
+
2
];
out
[
bin3
].
sum_
gradient
s
+=
ordered_
gradient
s
[
i
+
3
];
out
[
bin3
].
sum_
hessian
s
+=
ordered_
hessian
s
[
i
+
3
];
out
[
bin0
].
sum_hessians
+=
ordered_hessians
[
i
]
;
++
out
[
bin0
].
cnt
;
out
[
bin1
].
sum_hessians
+=
ordered_hessians
[
i
+
1
]
;
++
out
[
bin1
].
cnt
;
out
[
bin2
].
sum_hessians
+=
ordered_hessians
[
i
+
2
]
;
++
out
[
bin2
].
cnt
;
out
[
bin3
].
sum_hessians
+=
ordered_hessians
[
i
+
3
]
;
++
out
[
bin3
].
cnt
;
++
out
[
bin0
].
cnt
;
}
++
out
[
bin1
].
cnt
;
++
out
[
bin2
].
cnt
;
++
out
[
bin3
].
cnt
;
}
for
(;
i
<
num_data
;
++
i
)
{
for
(;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
data_size_t
idx
=
data_indices
[
i
];
const
auto
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
const
auto
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
out
[
bin
].
sum_gradients
+=
ordered_gradients
[
i
];
out
[
bin
].
sum_gradients
+=
ordered_gradients
[
i
];
out
[
bin
].
sum_hessians
+=
ordered_hessians
[
i
];
out
[
bin
].
sum_hessians
+=
ordered_hessians
[
i
];
++
out
[
bin
].
cnt
;
++
out
[
bin
].
cnt
;
}
}
}
}
else
{
// use full data
void
ConstructHistogram
(
data_size_t
num_data
,
const
data_size_t
rest
=
num_data
&
0x3
;
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
,
data_size_t
i
=
0
;
HistogramBinEntry
*
out
)
const
override
{
for
(;
i
<
num_data
-
rest
;
i
+=
4
)
{
const
data_size_t
rest
=
num_data
&
0x3
;
int
j
=
i
>>
1
;
data_size_t
i
=
0
;
const
auto
bin0
=
(
data_
[
j
])
&
0xf
;
for
(;
i
<
num_data
-
rest
;
i
+=
4
)
{
const
auto
bin1
=
(
data_
[
j
]
>>
4
)
&
0xf
;
int
j
=
i
>>
1
;
++
j
;
const
auto
bin0
=
(
data_
[
j
])
&
0xf
;
const
auto
bin2
=
(
data_
[
j
])
&
0xf
;
const
auto
bin1
=
(
data_
[
j
]
>>
4
)
&
0xf
;
const
auto
bin3
=
(
data_
[
j
]
>>
4
)
&
0xf
;
++
j
;
const
auto
bin2
=
(
data_
[
j
])
&
0xf
;
out
[
bin0
].
sum_gradients
+=
ordered_gradients
[
i
];
const
auto
bin3
=
(
data_
[
j
]
>>
4
)
&
0xf
;
out
[
bin1
].
sum_gradients
+=
ordered_gradients
[
i
+
1
];
out
[
bin2
].
sum_gradients
+=
ordered_gradients
[
i
+
2
];
out
[
bin0
].
sum_gradients
+=
ordered_gradients
[
i
];
out
[
bin3
].
sum_gradients
+=
ordered_gradients
[
i
+
3
];
out
[
bin1
].
sum_gradients
+=
ordered_gradients
[
i
+
1
];
out
[
bin2
].
sum_gradients
+=
ordered_gradients
[
i
+
2
];
out
[
bin0
].
sum_hessians
+=
ordered_hessians
[
i
];
out
[
bin3
].
sum_gradients
+=
ordered_gradients
[
i
+
3
];
out
[
bin1
].
sum_hessians
+=
ordered_hessians
[
i
+
1
];
out
[
bin2
].
sum_hessians
+=
ordered_hessians
[
i
+
2
];
out
[
bin0
].
sum_hessians
+=
ordered_hessians
[
i
];
out
[
bin3
].
sum_hessians
+=
ordered_hessians
[
i
+
3
];
out
[
bin1
].
sum_hessians
+=
ordered_hessians
[
i
+
1
];
out
[
bin2
].
sum_hessians
+=
ordered_hessians
[
i
+
2
];
++
out
[
bin0
].
cnt
;
out
[
bin3
].
sum_hessians
+=
ordered_hessians
[
i
+
3
];
++
out
[
bin1
].
cnt
;
++
out
[
bin2
].
cnt
;
++
out
[
bin0
].
cnt
;
++
out
[
bin3
].
cnt
;
++
out
[
bin1
].
cnt
;
}
++
out
[
bin2
].
cnt
;
for
(;
i
<
num_data
;
++
i
)
{
++
out
[
bin3
].
cnt
;
const
auto
bin
=
(
data_
[
i
>>
1
]
>>
((
i
&
1
)
<<
2
))
&
0xf
;
}
out
[
bin
].
sum_gradients
+=
ordered_gradients
[
i
];
for
(;
i
<
num_data
;
++
i
)
{
out
[
bin
].
sum_hessians
+=
ordered_hessians
[
i
];
const
auto
bin
=
(
data_
[
i
>>
1
]
>>
((
i
&
1
)
<<
2
))
&
0xf
;
++
out
[
bin
].
cnt
;
out
[
bin
].
sum_gradients
+=
ordered_gradients
[
i
];
}
out
[
bin
].
sum_hessians
+=
ordered_hessians
[
i
];
++
out
[
bin
].
cnt
;
}
}
}
}
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
num_data
,
void
ConstructHistogram
(
const
data_size_t
*
data_indices
,
data_size_t
num_data
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_gradients
,
HistogramBinEntry
*
out
)
const
override
{
HistogramBinEntry
*
out
)
const
override
{
if
(
data_indices
!=
nullptr
)
{
// if use part of data
const
data_size_t
rest
=
num_data
&
0x3
;
data_size_t
i
=
0
;
const
data_size_t
rest
=
num_data
&
0x3
;
for
(;
i
<
num_data
-
rest
;
i
+=
4
)
{
data_size_t
i
=
0
;
data_size_t
idx
=
data_indices
[
i
];
for
(;
i
<
num_data
-
rest
;
i
+=
4
)
{
const
auto
bin0
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
data_size_t
idx
=
data_indices
[
i
];
idx
=
data_indices
[
i
+
1
];
const
auto
bin0
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
const
auto
bin1
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
idx
=
data_indices
[
i
+
1
];
idx
=
data_indices
[
i
+
2
];
const
auto
bin1
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
const
auto
bin2
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
idx
=
data_indices
[
i
+
2
];
idx
=
data_indices
[
i
+
3
];
const
auto
bin2
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
const
auto
bin3
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
idx
=
data_indices
[
i
+
3
];
out
[
bin0
].
sum_gradients
+=
ordered_gradients
[
i
];
const
auto
bin3
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
out
[
bin1
].
sum_gradients
+=
ordered_gradients
[
i
+
1
];
out
[
bin2
].
sum_gradients
+=
ordered_gradients
[
i
+
2
];
out
[
bin3
].
sum_gradients
+=
ordered_gradients
[
i
+
3
];
out
[
bin0
].
sum_gradients
+=
ordered_gradients
[
i
];
out
[
bin1
].
sum_gradients
+=
ordered_gradients
[
i
+
1
];
++
out
[
bin0
].
cnt
;
out
[
bin2
].
sum_gradients
+=
ordered_gradients
[
i
+
2
];
++
out
[
bin1
].
cnt
;
out
[
bin3
].
sum_gradients
+=
ordered_gradients
[
i
+
3
];
++
out
[
bin2
].
cnt
;
++
out
[
bin3
].
cnt
;
++
out
[
bin0
].
cnt
;
}
++
out
[
bin1
].
cnt
;
++
out
[
bin2
].
cnt
;
++
out
[
bin3
].
cnt
;
}
for
(;
i
<
num_data
;
++
i
)
{
for
(;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
const
data_size_t
idx
=
data_indices
[
i
];
const
auto
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
const
auto
bin
=
(
data_
[
idx
>>
1
]
>>
((
idx
&
1
)
<<
2
))
&
0xf
;
out
[
bin
].
sum_gradients
+=
ordered_gradients
[
i
];
out
[
bin
].
sum_gradients
+=
ordered_gradients
[
i
];
++
out
[
bin
].
cnt
;
++
out
[
bin
].
cnt
;
}
}
}
}
else
{
// use full data
void
ConstructHistogram
(
data_size_t
num_data
,
const
data_size_t
rest
=
num_data
&
0x3
;
const
score_t
*
ordered_gradients
,
data_size_t
i
=
0
;
HistogramBinEntry
*
out
)
const
override
{
for
(;
i
<
num_data
-
rest
;
i
+=
4
)
{
const
data_size_t
rest
=
num_data
&
0x3
;
int
j
=
i
>>
1
;
data_size_t
i
=
0
;
const
auto
bin0
=
(
data_
[
j
])
&
0xf
;
for
(;
i
<
num_data
-
rest
;
i
+=
4
)
{
const
auto
bin1
=
(
data_
[
j
]
>>
4
)
&
0xf
;
int
j
=
i
>>
1
;
++
j
;
const
auto
bin0
=
(
data_
[
j
])
&
0xf
;
const
auto
bin2
=
(
data_
[
j
])
&
0xf
;
const
auto
bin1
=
(
data_
[
j
]
>>
4
)
&
0xf
;
const
auto
bin3
=
(
data_
[
j
]
>>
4
)
&
0xf
;
++
j
;
const
auto
bin2
=
(
data_
[
j
])
&
0xf
;
out
[
bin0
].
sum_gradients
+=
ordered_gradients
[
i
];
const
auto
bin3
=
(
data_
[
j
]
>>
4
)
&
0xf
;
out
[
bin1
].
sum_gradients
+=
ordered_gradients
[
i
+
1
];
out
[
bin2
].
sum_gradients
+=
ordered_gradients
[
i
+
2
];
out
[
bin0
].
sum_gradients
+=
ordered_gradients
[
i
];
out
[
bin3
].
sum_gradients
+=
ordered_gradients
[
i
+
3
];
out
[
bin1
].
sum_gradients
+=
ordered_gradients
[
i
+
1
];
out
[
bin2
].
sum_gradients
+=
ordered_gradients
[
i
+
2
];
++
out
[
bin0
].
cnt
;
out
[
bin3
].
sum_gradients
+=
ordered_gradients
[
i
+
3
];
++
out
[
bin1
].
cnt
;
++
out
[
bin2
].
cnt
;
++
out
[
bin0
].
cnt
;
++
out
[
bin3
].
cnt
;
++
out
[
bin1
].
cnt
;
}
++
out
[
bin2
].
cnt
;
for
(;
i
<
num_data
;
++
i
)
{
++
out
[
bin3
].
cnt
;
const
auto
bin
=
(
data_
[
i
>>
1
]
>>
((
i
&
1
)
<<
2
))
&
0xf
;
}
out
[
bin
].
sum_gradients
+=
ordered_gradients
[
i
];
for
(;
i
<
num_data
;
++
i
)
{
++
out
[
bin
].
cnt
;
const
auto
bin
=
(
data_
[
i
>>
1
]
>>
((
i
&
1
)
<<
2
))
&
0xf
;
}
out
[
bin
].
sum_gradients
+=
ordered_gradients
[
i
];
++
out
[
bin
].
cnt
;
}
}
}
}
...
...
src/io/sparse_bin.hpp
View file @
66b7f032
...
@@ -104,12 +104,24 @@ public:
...
@@ -104,12 +104,24 @@ public:
Log
::
Fatal
(
"Using OrderedSparseBin->ConstructHistogram() instead"
);
Log
::
Fatal
(
"Using OrderedSparseBin->ConstructHistogram() instead"
);
}
}
void
ConstructHistogram
(
data_size_t
,
const
score_t
*
,
const
score_t
*
,
HistogramBinEntry
*
)
const
override
{
// Will use OrderedSparseBin->ConstructHistogram() instead
Log
::
Fatal
(
"Using OrderedSparseBin->ConstructHistogram() instead"
);
}
void
ConstructHistogram
(
const
data_size_t
*
,
data_size_t
,
const
score_t
*
,
void
ConstructHistogram
(
const
data_size_t
*
,
data_size_t
,
const
score_t
*
,
HistogramBinEntry
*
)
const
override
{
HistogramBinEntry
*
)
const
override
{
// Will use OrderedSparseBin->ConstructHistogram() instead
// Will use OrderedSparseBin->ConstructHistogram() instead
Log
::
Fatal
(
"Using OrderedSparseBin->ConstructHistogram() instead"
);
Log
::
Fatal
(
"Using OrderedSparseBin->ConstructHistogram() instead"
);
}
}
void
ConstructHistogram
(
data_size_t
,
const
score_t
*
,
HistogramBinEntry
*
)
const
override
{
// Will use OrderedSparseBin->ConstructHistogram() instead
Log
::
Fatal
(
"Using OrderedSparseBin->ConstructHistogram() instead"
);
}
inline
bool
NextNonzero
(
data_size_t
*
i_delta
,
inline
bool
NextNonzero
(
data_size_t
*
i_delta
,
data_size_t
*
cur_pos
)
const
{
data_size_t
*
cur_pos
)
const
{
++
(
*
i_delta
);
++
(
*
i_delta
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment