Commit 3ef3a489 authored by Guolin Ke's avatar Guolin Ke
Browse files

change init_score to double type

parent 12a96334
...@@ -204,6 +204,8 @@ SEXP LGBM_DatasetSetField_R(SEXP handle, ...@@ -204,6 +204,8 @@ SEXP LGBM_DatasetSetField_R(SEXP handle,
vec[i] = static_cast<int32_t>(R_INT_PTR(field_data)[i]); vec[i] = static_cast<int32_t>(R_INT_PTR(field_data)[i]);
} }
CHECK_CALL(LGBM_DatasetSetField(R_GET_PTR(handle), name, vec.data(), len, C_API_DTYPE_INT32)); CHECK_CALL(LGBM_DatasetSetField(R_GET_PTR(handle), name, vec.data(), len, C_API_DTYPE_INT32));
} else if(!strcmp("init_score", name)) {
CHECK_CALL(LGBM_DatasetSetField(R_GET_PTR(handle), name, R_REAL_PTR(field_data), len, C_API_DTYPE_FLOAT64));
} else { } else {
std::vector<float> vec(len); std::vector<float> vec(len);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
...@@ -234,6 +236,12 @@ SEXP LGBM_DatasetGetField_R(SEXP handle, ...@@ -234,6 +236,12 @@ SEXP LGBM_DatasetGetField_R(SEXP handle,
for (int i = 0; i < out_len - 1; ++i) { for (int i = 0; i < out_len - 1; ++i) {
R_INT_PTR(field_data)[i] = p_data[i + 1] - p_data[i]; R_INT_PTR(field_data)[i] = p_data[i + 1] - p_data[i];
} }
} else if (!strcmp("init_score", name)) {
auto p_data = reinterpret_cast<const double*>(res);
#pragma omp parallel for schedule(static)
for (int i = 0; i < out_len; ++i) {
R_REAL_PTR(field_data)[i] = p_data[i];
}
} else { } else {
auto p_data = reinterpret_cast<const float*>(res); auto p_data = reinterpret_cast<const float*>(res);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
......
...@@ -94,8 +94,6 @@ public: ...@@ -94,8 +94,6 @@ public:
* \brief Set initial scores * \brief Set initial scores
* \param init_score Initial scores, this class will manage memory for init_score. * \param init_score Initial scores, this class will manage memory for init_score.
*/ */
void SetInitScore(const float* init_score, data_size_t len);
void SetInitScore(const double* init_score, data_size_t len); void SetInitScore(const double* init_score, data_size_t len);
...@@ -195,7 +193,7 @@ public: ...@@ -195,7 +193,7 @@ public:
* \brief Get initial scores, if not exists, will return nullptr * \brief Get initial scores, if not exists, will return nullptr
* \return Pointer of initial scores * \return Pointer of initial scores
*/ */
inline const float* init_score() const { inline const double* init_score() const {
if (!init_score_.empty()) { if (!init_score_.empty()) {
return init_score_.data(); return init_score_.data();
} else { } else {
...@@ -206,7 +204,7 @@ public: ...@@ -206,7 +204,7 @@ public:
/*! /*!
* \brief Get size of initial scores * \brief Get size of initial scores
*/ */
inline data_size_t num_init_score() const { return num_init_score_; } inline int64_t num_init_score() const { return num_init_score_; }
/*! \brief Disable copy */ /*! \brief Disable copy */
Metadata& operator=(const Metadata&) = delete; Metadata& operator=(const Metadata&) = delete;
...@@ -239,9 +237,9 @@ private: ...@@ -239,9 +237,9 @@ private:
/*! \brief Number of querys */ /*! \brief Number of querys */
data_size_t num_queries_; data_size_t num_queries_;
/*! \brief Number of Initial score, used to check correct weight file */ /*! \brief Number of Initial score, used to check correct weight file */
data_size_t num_init_score_; int64_t num_init_score_;
/*! \brief Initial score */ /*! \brief Initial score */
std::vector<float> init_score_; std::vector<double> init_score_;
/*! \brief Queries data */ /*! \brief Queries data */
std::vector<data_size_t> queries_; std::vector<data_size_t> queries_;
/*! \brief mutex for threading safe call */ /*! \brief mutex for threading safe call */
...@@ -336,10 +334,14 @@ public: ...@@ -336,10 +334,14 @@ public:
bool SetFloatField(const char* field_name, const float* field_data, data_size_t num_element); bool SetFloatField(const char* field_name, const float* field_data, data_size_t num_element);
bool SetDoubleField(const char* field_name, const double* field_data, data_size_t num_element);
bool SetIntField(const char* field_name, const int* field_data, data_size_t num_element); bool SetIntField(const char* field_name, const int* field_data, data_size_t num_element);
bool GetFloatField(const char* field_name, data_size_t* out_len, const float** out_ptr); bool GetFloatField(const char* field_name, data_size_t* out_len, const float** out_ptr);
bool GetDoubleField(const char* field_name, data_size_t* out_len, const double** out_ptr);
bool GetIntField(const char* field_name, data_size_t* out_len, const int** out_ptr); bool GetIntField(const char* field_name, data_size_t* out_len, const int** out_ptr);
/*! /*!
......
...@@ -90,6 +90,14 @@ def cfloat32_array_to_numpy(cptr, length): ...@@ -90,6 +90,14 @@ def cfloat32_array_to_numpy(cptr, length):
else: else:
raise RuntimeError('Expected float pointer') raise RuntimeError('Expected float pointer')
def cfloat64_array_to_numpy(cptr, length):
"""Convert a ctypes double pointer array to a numpy array.
"""
if isinstance(cptr, ctypes.POINTER(ctypes.c_double)):
return np.fromiter(cptr, dtype=np.float64, count=length)
else:
raise RuntimeError('Expected double pointer')
def cint32_array_to_numpy(cptr, length): def cint32_array_to_numpy(cptr, length):
"""Convert a ctypes float pointer array to a numpy array. """Convert a ctypes float pointer array to a numpy array.
...@@ -162,7 +170,7 @@ C_API_PREDICT_LEAF_INDEX = 2 ...@@ -162,7 +170,7 @@ C_API_PREDICT_LEAF_INDEX = 2
"""data type of data field""" """data type of data field"""
FIELD_TYPE_MAPPER = {"label": C_API_DTYPE_FLOAT32, FIELD_TYPE_MAPPER = {"label": C_API_DTYPE_FLOAT32,
"weight": C_API_DTYPE_FLOAT32, "weight": C_API_DTYPE_FLOAT32,
"init_score": C_API_DTYPE_FLOAT32, "init_score": C_API_DTYPE_FLOAT64,
"group": C_API_DTYPE_INT32} "group": C_API_DTYPE_INT32}
...@@ -616,7 +624,6 @@ class Dataset(object): ...@@ -616,7 +624,6 @@ class Dataset(object):
for j in range_(self.predictor.num_class): for j in range_(self.predictor.num_class):
new_init_score[j * num_data + i] = init_score[i * self.predictor.num_class + j] new_init_score[j * num_data + i] = init_score[i * self.predictor.num_class + j]
init_score = new_init_score init_score = new_init_score
init_score = init_score.astype(dtype=np.float32, copy=False)
self.set_init_score(init_score) self.set_init_score(init_score)
elif self.predictor is not None: elif self.predictor is not None:
raise TypeError('wrong predictor type {}'.format(type(self.predictor).__name__)) raise TypeError('wrong predictor type {}'.format(type(self.predictor).__name__))
...@@ -813,16 +820,23 @@ class Dataset(object): ...@@ -813,16 +820,23 @@ class Dataset(object):
ctypes.c_int(0), ctypes.c_int(0),
ctypes.c_int(FIELD_TYPE_MAPPER[field_name]))) ctypes.c_int(FIELD_TYPE_MAPPER[field_name])))
return return
dtype = np.int32 if field_name == 'group' else np.float32 dtype = np.float32
if field_name == 'group':
dtype = np.int32
elif field_name == 'init_score':
dtype = np.float64
data = list_to_1d_numpy(data, dtype, name=field_name) data = list_to_1d_numpy(data, dtype, name=field_name)
if data.dtype == np.float32: if data.dtype == np.float32:
ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)) ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
type_data = C_API_DTYPE_FLOAT32 type_data = C_API_DTYPE_FLOAT32
elif data.dtype == np.float64:
ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
type_data = C_API_DTYPE_FLOAT64
elif data.dtype == np.int32: elif data.dtype == np.int32:
ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)) ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_int32))
type_data = C_API_DTYPE_INT32 type_data = C_API_DTYPE_INT32
else: else:
raise TypeError("Excepted np.float32 or np.int32, meet type({})".format(data.dtype)) raise TypeError("Excepted np.float32/64 or np.int32, meet type({})".format(data.dtype))
if type_data != FIELD_TYPE_MAPPER[field_name]: if type_data != FIELD_TYPE_MAPPER[field_name]:
raise TypeError("Input type error for set_field") raise TypeError("Input type error for set_field")
_safe_call(_LIB.LGBM_DatasetSetField( _safe_call(_LIB.LGBM_DatasetSetField(
...@@ -864,6 +878,8 @@ class Dataset(object): ...@@ -864,6 +878,8 @@ class Dataset(object):
return cint32_array_to_numpy(ctypes.cast(ret, ctypes.POINTER(ctypes.c_int32)), tmp_out_len.value) return cint32_array_to_numpy(ctypes.cast(ret, ctypes.POINTER(ctypes.c_int32)), tmp_out_len.value)
elif out_type.value == C_API_DTYPE_FLOAT32: elif out_type.value == C_API_DTYPE_FLOAT32:
return cfloat32_array_to_numpy(ctypes.cast(ret, ctypes.POINTER(ctypes.c_float)), tmp_out_len.value) return cfloat32_array_to_numpy(ctypes.cast(ret, ctypes.POINTER(ctypes.c_float)), tmp_out_len.value)
elif out_type.value == C_API_DTYPE_FLOAT64:
return cfloat64_array_to_numpy(ctypes.cast(ret, ctypes.POINTER(ctypes.c_double)), tmp_out_len.value)
else: else:
raise TypeError("Unknown type") raise TypeError("Unknown type")
...@@ -976,7 +992,7 @@ class Dataset(object): ...@@ -976,7 +992,7 @@ class Dataset(object):
""" """
self.init_score = init_score self.init_score = init_score
if self.handle is not None and init_score is not None: if self.handle is not None and init_score is not None:
init_score = list_to_1d_numpy(init_score, name='init_score') init_score = list_to_1d_numpy(init_score, np.float64, name='init_score')
self.set_field('init_score', init_score) self.set_field('init_score', init_score)
def set_group(self, group): def set_group(self, group):
......
...@@ -20,18 +20,22 @@ public: ...@@ -20,18 +20,22 @@ public:
*/ */
ScoreUpdater(const Dataset* data, int num_class) : data_(data) { ScoreUpdater(const Dataset* data, int num_class) : data_(data) {
num_data_ = data->num_data(); num_data_ = data->num_data();
size_t total_size = static_cast<size_t>(num_data_) * num_class; int64_t total_size = static_cast<int64_t>(num_data_) * num_class;
score_.resize(total_size); score_.resize(total_size);
// default start score is zero // default start score is zero
std::fill(score_.begin(), score_.end(), 0.0f); #pragma omp parallel for schedule(static)
const float* init_score = data->metadata().init_score(); for (int64_t i = 0; i < total_size; ++i) {
score_[i] = 0.0f;
}
const double* init_score = data->metadata().init_score();
// if exists initial score, will start from it // if exists initial score, will start from it
if (init_score != nullptr) { if (init_score != nullptr) {
if ((data->metadata().num_init_score() % num_data_) != 0 if ((data->metadata().num_init_score() % num_data_) != 0
|| (data->metadata().num_init_score() / num_data_) != num_class) { || (data->metadata().num_init_score() / num_data_) != num_class) {
Log::Fatal("number of class for initial score error"); Log::Fatal("number of class for initial score error");
} }
for (size_t i = 0; i < total_size; ++i) { #pragma omp parallel for schedule(static)
for (int64_t i = 0; i < total_size; ++i) {
score_[i] = init_score[i]; score_[i] = init_score[i];
} }
} }
......
...@@ -536,6 +536,8 @@ DllExport int LGBM_DatasetSetField(DatasetHandle handle, ...@@ -536,6 +536,8 @@ DllExport int LGBM_DatasetSetField(DatasetHandle handle,
is_success = dataset->SetFloatField(field_name, reinterpret_cast<const float*>(field_data), static_cast<int32_t>(num_element)); is_success = dataset->SetFloatField(field_name, reinterpret_cast<const float*>(field_data), static_cast<int32_t>(num_element));
} else if (type == C_API_DTYPE_INT32) { } else if (type == C_API_DTYPE_INT32) {
is_success = dataset->SetIntField(field_name, reinterpret_cast<const int*>(field_data), static_cast<int32_t>(num_element)); is_success = dataset->SetIntField(field_name, reinterpret_cast<const int*>(field_data), static_cast<int32_t>(num_element));
} else if (type == C_API_DTYPE_FLOAT64) {
is_success = dataset->SetDoubleField(field_name, reinterpret_cast<const double*>(field_data), static_cast<int32_t>(num_element));
} }
if (!is_success) { throw std::runtime_error("Input data type erorr or field not found"); } if (!is_success) { throw std::runtime_error("Input data type erorr or field not found"); }
API_END(); API_END();
...@@ -555,6 +557,9 @@ DllExport int LGBM_DatasetGetField(DatasetHandle handle, ...@@ -555,6 +557,9 @@ DllExport int LGBM_DatasetGetField(DatasetHandle handle,
} else if (dataset->GetIntField(field_name, out_len, reinterpret_cast<const int**>(out_ptr))) { } else if (dataset->GetIntField(field_name, out_len, reinterpret_cast<const int**>(out_ptr))) {
*out_type = C_API_DTYPE_INT32; *out_type = C_API_DTYPE_INT32;
is_success = true; is_success = true;
} else if (dataset->GetDoubleField(field_name, out_len, reinterpret_cast<const double**>(out_ptr))) {
*out_type = C_API_DTYPE_FLOAT64;
is_success = true;
} }
if (!is_success) { throw std::runtime_error("Field not found"); } if (!is_success) { throw std::runtime_error("Field not found"); }
if (*out_ptr == nullptr) { *out_len = 0; } if (*out_ptr == nullptr) { *out_len = 0; }
......
...@@ -77,7 +77,16 @@ bool Dataset::SetFloatField(const char* field_name, const float* field_data, dat ...@@ -77,7 +77,16 @@ bool Dataset::SetFloatField(const char* field_name, const float* field_data, dat
metadata_.SetLabel(field_data, num_element); metadata_.SetLabel(field_data, num_element);
} else if (name == std::string("weight") || name == std::string("weights")) { } else if (name == std::string("weight") || name == std::string("weights")) {
metadata_.SetWeights(field_data, num_element); metadata_.SetWeights(field_data, num_element);
} else if (name == std::string("init_score")) { } else {
return false;
}
return true;
}
bool Dataset::SetDoubleField(const char* field_name, const double* field_data, data_size_t num_element) {
std::string name(field_name);
name = Common::Trim(name);
if (name == std::string("init_score")) {
metadata_.SetInitScore(field_data, num_element); metadata_.SetInitScore(field_data, num_element);
} else { } else {
return false; return false;
...@@ -107,9 +116,18 @@ bool Dataset::GetFloatField(const char* field_name, data_size_t* out_len, const ...@@ -107,9 +116,18 @@ bool Dataset::GetFloatField(const char* field_name, data_size_t* out_len, const
} else if (name == std::string("weight") || name == std::string("weights")) { } else if (name == std::string("weight") || name == std::string("weights")) {
*out_ptr = metadata_.weights(); *out_ptr = metadata_.weights();
*out_len = num_data_; *out_len = num_data_;
} else if (name == std::string("init_score")) { } else {
return false;
}
return true;
}
bool Dataset::GetDoubleField(const char* field_name, data_size_t* out_len, const double** out_ptr) {
std::string name(field_name);
name = Common::Trim(name);
if (name == std::string("init_score")) {
*out_ptr = metadata_.init_score(); *out_ptr = metadata_.init_score();
*out_len = num_data_; *out_len = static_cast<data_size_t>(metadata_.num_init_score());
} else { } else {
return false; return false;
} }
......
...@@ -20,7 +20,10 @@ public: ...@@ -20,7 +20,10 @@ public:
: num_data_(num_data) { : num_data_(num_data) {
data_.resize(num_data_); data_.resize(num_data_);
VAL_T default_bin_T = static_cast<VAL_T>(default_bin); VAL_T default_bin_T = static_cast<VAL_T>(default_bin);
std::fill(data_.begin(), data_.end(), default_bin_T); #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
data_[i] = default_bin_T;
}
} }
~DenseBin() { ~DenseBin() {
......
...@@ -36,7 +36,10 @@ void Metadata::Init(data_size_t num_data, int weight_idx, int query_idx) { ...@@ -36,7 +36,10 @@ void Metadata::Init(data_size_t num_data, int weight_idx, int query_idx) {
} }
weights_ = std::vector<float>(num_data_); weights_ = std::vector<float>(num_data_);
num_weights_ = num_data_; num_weights_ = num_data_;
std::fill(weights_.begin(), weights_.end(), 0.0f); #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_weights_; ++i) {
weights_[i] = 0.0f;
}
} }
if (query_idx >= 0) { if (query_idx >= 0) {
if (!query_boundaries_.empty()) { if (!query_boundaries_.empty()) {
...@@ -45,7 +48,10 @@ void Metadata::Init(data_size_t num_data, int weight_idx, int query_idx) { ...@@ -45,7 +48,10 @@ void Metadata::Init(data_size_t num_data, int weight_idx, int query_idx) {
} }
if (!query_weights_.empty()) { query_weights_.clear(); } if (!query_weights_.empty()) { query_weights_.clear(); }
queries_ = std::vector<data_size_t>(num_data_); queries_ = std::vector<data_size_t>(num_data_);
std::fill(queries_.begin(), queries_.end(), 0); #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
queries_[i] = 0;
}
} }
} }
...@@ -53,6 +59,7 @@ void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, da ...@@ -53,6 +59,7 @@ void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, da
num_data_ = num_used_indices; num_data_ = num_used_indices;
label_ = std::vector<float>(num_used_indices); label_ = std::vector<float>(num_used_indices);
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_used_indices; i++) { for (data_size_t i = 0; i < num_used_indices; i++) {
label_[i] = fullset.label_[used_indices[i]]; label_[i] = fullset.label_[used_indices[i]];
} }
...@@ -60,6 +67,7 @@ void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, da ...@@ -60,6 +67,7 @@ void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, da
if (!fullset.weights_.empty()) { if (!fullset.weights_.empty()) {
weights_ = std::vector<float>(num_used_indices); weights_ = std::vector<float>(num_used_indices);
num_weights_ = num_used_indices; num_weights_ = num_used_indices;
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_used_indices; i++) { for (data_size_t i = 0; i < num_used_indices; i++) {
weights_[i] = fullset.weights_[used_indices[i]]; weights_[i] = fullset.weights_[used_indices[i]];
} }
...@@ -68,9 +76,10 @@ void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, da ...@@ -68,9 +76,10 @@ void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, da
} }
if (!fullset.init_score_.empty()) { if (!fullset.init_score_.empty()) {
int num_class = static_cast<int>(fullset.num_init_score_) / fullset.num_data_; int num_class = static_cast<int>(fullset.num_init_score_ / fullset.num_data_);
init_score_ = std::vector<float>(num_used_indices*num_class); init_score_ = std::vector<double>(num_used_indices*num_class);
num_init_score_ = num_used_indices*num_class; num_init_score_ = static_cast<int64_t>(num_used_indices) * num_class;
#pragma omp parallel for schedule(static)
for (int k = 0; k < num_class; ++k) { for (int k = 0; k < num_class; ++k) {
for (data_size_t i = 0; i < num_used_indices; i++) { for (data_size_t i = 0; i < num_used_indices; i++) {
init_score_[k*num_data_ + i] = fullset.init_score_[k* fullset.num_data_ + used_indices[i]]; init_score_[k*num_data_ + i] = fullset.init_score_[k* fullset.num_data_ + used_indices[i]];
...@@ -121,6 +130,7 @@ void Metadata::PartitionLabel(const std::vector<data_size_t>& used_indices) { ...@@ -121,6 +130,7 @@ void Metadata::PartitionLabel(const std::vector<data_size_t>& used_indices) {
auto old_label = label_; auto old_label = label_;
num_data_ = static_cast<data_size_t>(used_indices.size()); num_data_ = static_cast<data_size_t>(used_indices.size());
label_ = std::vector<float>(num_data_); label_ = std::vector<float>(num_data_);
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
label_[i] = old_label[used_indices[i]]; label_[i] = old_label[used_indices[i]];
} }
...@@ -201,7 +211,8 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data ...@@ -201,7 +211,8 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
auto old_weights = weights_; auto old_weights = weights_;
num_weights_ = num_data_; num_weights_ = num_data_;
weights_ = std::vector<float>(num_data_); weights_ = std::vector<float>(num_data_);
for (size_t i = 0; i < used_data_indices.size(); ++i) { #pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(used_data_indices.size()); ++i) {
weights_[i] = old_weights[used_data_indices[i]]; weights_[i] = old_weights[used_data_indices[i]];
} }
old_weights.clear(); old_weights.clear();
...@@ -243,9 +254,10 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data ...@@ -243,9 +254,10 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
// get local initial scores // get local initial scores
if (!init_score_.empty()) { if (!init_score_.empty()) {
auto old_scores = init_score_; auto old_scores = init_score_;
int num_class = num_init_score_ / num_all_data; int num_class = static_cast<int>(num_init_score_ / num_all_data);
num_init_score_ = num_data_ * num_class; num_init_score_ = static_cast<int64_t>(num_data_) * num_class;
init_score_ = std::vector<float>(num_init_score_); init_score_ = std::vector<double>(num_init_score_);
#pragma omp parallel for schedule(static)
for (int k = 0; k < num_class; ++k){ for (int k = 0; k < num_class; ++k){
for (size_t i = 0; i < used_data_indices.size(); ++i) { for (size_t i = 0; i < used_data_indices.size(); ++i) {
init_score_[k * num_data_ + i] = old_scores[k * num_all_data + used_data_indices[i]]; init_score_[k * num_data_ + i] = old_scores[k * num_all_data + used_data_indices[i]];
...@@ -259,26 +271,6 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data ...@@ -259,26 +271,6 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
} }
} }
void Metadata::SetInitScore(const float* init_score, data_size_t len) {
std::lock_guard<std::mutex> lock(mutex_);
// save to nullptr
if (init_score == nullptr || len == 0) {
init_score_.clear();
num_init_score_ = 0;
return;
}
if ((len % num_data_) != 0) {
Log::Fatal("Initial score size doesn't match data size");
}
if (!init_score_.empty()) { init_score_.clear(); }
num_init_score_ = len;
init_score_ = std::vector<float>(len);
for (data_size_t i = 0; i < len; ++i) {
init_score_[i] = init_score[i];
}
}
void Metadata::SetInitScore(const double* init_score, data_size_t len) { void Metadata::SetInitScore(const double* init_score, data_size_t len) {
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
// save to nullptr // save to nullptr
...@@ -292,9 +284,10 @@ void Metadata::SetInitScore(const double* init_score, data_size_t len) { ...@@ -292,9 +284,10 @@ void Metadata::SetInitScore(const double* init_score, data_size_t len) {
} }
if (!init_score_.empty()) { init_score_.clear(); } if (!init_score_.empty()) { init_score_.clear(); }
num_init_score_ = len; num_init_score_ = len;
init_score_ = std::vector<float>(len); init_score_ = std::vector<double>(len);
for (data_size_t i = 0; i < len; ++i) { #pragma omp parallel for schedule(static)
init_score_[i] = static_cast<float>(init_score[i]); for (int64_t i = 0; i < num_init_score_; ++i) {
init_score_[i] = init_score[i];
} }
} }
...@@ -308,6 +301,7 @@ void Metadata::SetLabel(const float* label, data_size_t len) { ...@@ -308,6 +301,7 @@ void Metadata::SetLabel(const float* label, data_size_t len) {
} }
if (!label_.empty()) { label_.clear(); } if (!label_.empty()) { label_.clear(); }
label_ = std::vector<float>(num_data_); label_ = std::vector<float>(num_data_);
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
label_[i] = label[i]; label_[i] = label[i];
} }
...@@ -327,6 +321,7 @@ void Metadata::SetWeights(const float* weights, data_size_t len) { ...@@ -327,6 +321,7 @@ void Metadata::SetWeights(const float* weights, data_size_t len) {
if (!weights_.empty()) { weights_.clear(); } if (!weights_.empty()) { weights_.clear(); }
num_weights_ = num_data_; num_weights_ = num_data_;
weights_ = std::vector<float>(num_weights_); weights_ = std::vector<float>(num_weights_);
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_weights_; ++i) { for (data_size_t i = 0; i < num_weights_; ++i) {
weights_[i] = weights[i]; weights_[i] = weights[i];
} }
...@@ -342,6 +337,7 @@ void Metadata::SetQuery(const data_size_t* query, data_size_t len) { ...@@ -342,6 +337,7 @@ void Metadata::SetQuery(const data_size_t* query, data_size_t len) {
return; return;
} }
data_size_t sum = 0; data_size_t sum = 0;
#pragma omp parallel for schedule(static) reduction(+:sum)
for (data_size_t i = 0; i < len; ++i) { for (data_size_t i = 0; i < len; ++i) {
sum += query[i]; sum += query[i];
} }
...@@ -413,6 +409,7 @@ void Metadata::LoadWeights() { ...@@ -413,6 +409,7 @@ void Metadata::LoadWeights() {
Log::Info("Loading weights..."); Log::Info("Loading weights...");
num_weights_ = static_cast<data_size_t>(reader.Lines().size()); num_weights_ = static_cast<data_size_t>(reader.Lines().size());
weights_ = std::vector<float>(num_weights_); weights_ = std::vector<float>(num_weights_);
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_weights_; ++i) { for (data_size_t i = 0; i < num_weights_; ++i) {
double tmp_weight = 0.0f; double tmp_weight = 0.0f;
Common::Atof(reader.Lines()[i].c_str(), &tmp_weight); Common::Atof(reader.Lines()[i].c_str(), &tmp_weight);
...@@ -435,26 +432,28 @@ void Metadata::LoadInitialScore() { ...@@ -435,26 +432,28 @@ void Metadata::LoadInitialScore() {
// use first line to count number class // use first line to count number class
int num_class = static_cast<int>(Common::Split(reader.Lines()[0].c_str(), '\t').size()); int num_class = static_cast<int>(Common::Split(reader.Lines()[0].c_str(), '\t').size());
data_size_t num_line = static_cast<data_size_t>(reader.Lines().size()); data_size_t num_line = static_cast<data_size_t>(reader.Lines().size());
num_init_score_ = static_cast<data_size_t>(num_line * num_class); num_init_score_ = static_cast<int64_t>(num_line) * num_class;
init_score_ = std::vector<float>(num_init_score_);
double tmp = 0.0f;
init_score_ = std::vector<double>(num_init_score_);
if (num_class == 1) { if (num_class == 1) {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_line; ++i) { for (data_size_t i = 0; i < num_line; ++i) {
double tmp = 0.0f;
Common::Atof(reader.Lines()[i].c_str(), &tmp); Common::Atof(reader.Lines()[i].c_str(), &tmp);
init_score_[i] = static_cast<float>(tmp); init_score_[i] = static_cast<double>(tmp);
} }
} else { } else {
std::vector<std::string> oneline_init_score; std::vector<std::string> oneline_init_score;
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_line; ++i) { for (data_size_t i = 0; i < num_line; ++i) {
double tmp = 0.0f;
oneline_init_score = Common::Split(reader.Lines()[i].c_str(), '\t'); oneline_init_score = Common::Split(reader.Lines()[i].c_str(), '\t');
if (static_cast<int>(oneline_init_score.size()) != num_class) { if (static_cast<int>(oneline_init_score.size()) != num_class) {
Log::Fatal("Invalid initial score file. Redundant or insufficient columns."); Log::Fatal("Invalid initial score file. Redundant or insufficient columns.");
} }
for (int k = 0; k < num_class; ++k) { for (int k = 0; k < num_class; ++k) {
Common::Atof(oneline_init_score[k].c_str(), &tmp); Common::Atof(oneline_init_score[k].c_str(), &tmp);
init_score_[k * num_line + i] = static_cast<float>(tmp); init_score_[k * num_line + i] = static_cast<double>(tmp);
} }
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment