"example/14_gemm_quantization" did not exist on "3eee1b9b8fa13d044509089c7fc8186f4439d412"
Commit 3b50aeac authored by Guolin Ke's avatar Guolin Ke
Browse files

merge from master

parents 504d400c c96ae6af
......@@ -56,7 +56,7 @@ public:
/*! \brief True if bin is trival (contains only one bin) */
inline bool is_trival() const { return is_trival_; }
/*! \brief Sparsity of this bin ( num_zero_bins / num_data ) */
inline float sparse_rate() const { return sparse_rate_; }
inline double sparse_rate() const { return sparse_rate_; }
/*!
* \brief Save binary data to file
* \param file File want to write
......@@ -67,7 +67,7 @@ public:
* \param bin
* \return Feature value of this bin
*/
inline float BinToValue(unsigned int bin) const {
inline double BinToValue(unsigned int bin) const {
return bin_upper_bound_[bin];
}
/*!
......@@ -79,14 +79,14 @@ public:
* \param value
* \return bin for this feature value
*/
inline unsigned int ValueToBin(float value) const;
inline unsigned int ValueToBin(double value) const;
/*!
* \brief Construct feature value to bin mapper according feature values
* \param values (Sampled) values of this feature
* \param max_bin The maximal number of bin
*/
void FindBin(std::vector<float>* values, int max_bin);
void FindBin(std::vector<double>* values, int max_bin);
/*!
* \brief Use specific number of bin to calculate the size of this class
......@@ -111,11 +111,11 @@ private:
/*! \brief Number of bins */
int num_bin_;
/*! \brief Store upper bound for each bin */
float* bin_upper_bound_;
double* bin_upper_bound_;
/*! \brief True if this feature is trival */
bool is_trival_;
/*! \brief Sparse rate of this bins( num_bin0/num_data ) */
float sparse_rate_;
double sparse_rate_;
};
/*!
......@@ -271,7 +271,7 @@ public:
* \return The bin data object
*/
static Bin* CreateBin(data_size_t num_data, int num_bin,
float sparse_rate, bool is_enable_sparse, bool* is_sparse, int default_bin);
double sparse_rate, bool is_enable_sparse, bool* is_sparse, int default_bin);
/*!
* \brief Create object for bin data of one feature, used for dense feature
......@@ -293,7 +293,7 @@ public:
int num_bin, int default_bin);
};
inline unsigned int BinMapper::ValueToBin(float value) const {
inline unsigned int BinMapper::ValueToBin(double value) const {
// binary search to find bin
int l = 0;
int r = num_bin_ - 1;
......
......@@ -58,7 +58,7 @@ public:
* \param num_used_model Number of used model
* \return Prediction result for this record
*/
virtual float PredictRaw(const float* feature_values,
virtual double PredictRaw(const double* feature_values,
int num_used_model) const = 0;
/*!
......@@ -67,7 +67,7 @@ public:
* \param num_used_model Number of used model
* \return Prediction result for this record
*/
virtual float Predict(const float* feature_values,
virtual double Predict(const double* feature_values,
int num_used_model) const = 0;
/*!
......@@ -77,7 +77,7 @@ public:
* \return Predicted leaf index for this record
*/
virtual std::vector<int> PredictLeafIndex(
const float* feature_values,
const double* feature_values,
int num_used_model) const = 0;
/*!
......@@ -85,7 +85,7 @@ public:
* \param feature_values Feature value on this record
* \return Prediction result, num_class numbers per line
*/
virtual std::vector<float> PredictMulticlass(const float* value, int num_used_model) const = 0;
virtual std::vector<double> PredictMulticlass(const double* value, int num_used_model) const = 0;
/*!
* \brief save model to file
......
......@@ -49,15 +49,15 @@ public:
const std::string& name, int* out);
/*!
* \brief Get float value by specific name of key
* \brief Get double value by specific name of key
* \param params Store the key and value for params
* \param name Name of key
* \param out Value will assign to out if key exists
* \return True if key exists
*/
inline bool GetFloat(
inline bool GetDouble(
const std::unordered_map<std::string, std::string>& params,
const std::string& name, float* out);
const std::string& name, double* out);
/*!
* \brief Get bool value by specific name of key
......@@ -123,9 +123,9 @@ public:
struct ObjectiveConfig: public ConfigBase {
public:
virtual ~ObjectiveConfig() {}
float sigmoid = 1.0f;
double sigmoid = 1.0f;
// for lambdarank
std::vector<float> label_gain;
std::vector<double> label_gain;
// for lambdarank
int max_position = 20;
// for binary
......@@ -140,8 +140,8 @@ struct MetricConfig: public ConfigBase {
public:
virtual ~MetricConfig() {}
int num_class = 1;
float sigmoid = 1.0f;
std::vector<float> label_gain;
double sigmoid = 1.0f;
std::vector<double> label_gain;
std::vector<int> eval_at;
void Set(const std::unordered_map<std::string, std::string>& params) override;
};
......@@ -151,13 +151,13 @@ public:
struct TreeConfig: public ConfigBase {
public:
int min_data_in_leaf = 100;
float min_sum_hessian_in_leaf = 10.0f;
double min_sum_hessian_in_leaf = 10.0f;
// should > 1, only one leaf means not need to learning
int num_leaves = 127;
int feature_fraction_seed = 2;
float feature_fraction = 1.0f;
double feature_fraction = 1.0f;
// max cache size(unit:MB) for historical histogram. < 0 means not limit
float histogram_pool_size = -1.0f;
double histogram_pool_size = -1.0f;
// max depth of tree model.
// Still grow tree by leaf-wise, but limit the max depth to avoid over-fitting
// And the max leaves will be min(num_leaves, pow(2, max_depth - 1))
......@@ -179,8 +179,8 @@ public:
int output_freq = 1;
bool is_provide_training_metric = false;
int num_iterations = 10;
float learning_rate = 0.1f;
float bagging_fraction = 1.0f;
double learning_rate = 0.1f;
double bagging_fraction = 1.0f;
int bagging_seed = 3;
int bagging_freq = 0;
int early_stopping_round = 0;
......@@ -268,12 +268,12 @@ inline bool ConfigBase::GetInt(
return false;
}
inline bool ConfigBase::GetFloat(
inline bool ConfigBase::GetDouble(
const std::unordered_map<std::string, std::string>& params,
const std::string& name, float* out) {
const std::string& name, double* out) {
if (params.count(name) > 0) {
if (!Common::AtofAndCheck(params.at(name).c_str(), out)) {
Log::Fatal("Parameter %s should be float type, passed is [%s]",
Log::Fatal("Parameter %s should be double type, passed is [%s]",
name.c_str(), params.at(name).c_str());
}
return true;
......
......@@ -227,7 +227,7 @@ public:
* \param out_label Label will store to this if exists
*/
virtual void ParseOneLine(const char* str,
std::vector<std::pair<int, float>>* out_features, float* out_label) const = 0;
std::vector<std::pair<int, double>>* out_features, double* out_label) const = 0;
/*!
* \brief Create a object of parser, will auto choose the format depend on file
......@@ -240,7 +240,7 @@ public:
};
using PredictFunction =
std::function<float(const std::vector<std::pair<int, float>>&)>;
std::function<double(const std::vector<std::pair<int, double>>&)>;
/*! \brief The main class of data set,
* which are used to traning or validation
......
......@@ -71,7 +71,7 @@ public:
* \param idx Index of record
* \param value feature value of record
*/
inline void PushData(int tid, data_size_t line_idx, float value) {
inline void PushData(int tid, data_size_t line_idx, double value) {
unsigned int bin = bin_mapper_->ValueToBin(value);
bin_data_->Push(tid, line_idx, bin);
}
......@@ -89,7 +89,7 @@ public:
* \param bin
* \return Feature value of this bin
*/
inline float BinToValue(unsigned int bin)
inline double BinToValue(unsigned int bin)
const { return bin_mapper_->BinToValue(bin); }
/*!
......
......@@ -34,7 +34,7 @@ public:
* \brief Calcaluting and printing metric result
* \param score Current prediction score
*/
virtual std::vector<float> Eval(const score_t* score) const = 0;
virtual std::vector<double> Eval(const score_t* score) const = 0;
/*!
* \brief Create object of metrics
......@@ -54,7 +54,7 @@ public:
* \brief Initial logic
* \param label_gain Gain for labels, default is 2^i - 1
*/
static void Init(std::vector<float> label_gain);
static void Init(std::vector<double> label_gain);
/*!
* \brief Calculate the DCG score at position k
......@@ -64,7 +64,7 @@ public:
* \param num_data Number of data
* \return The DCG score
*/
static float CalDCGAtK(data_size_t k, const float* label,
static score_t CalDCGAtK(data_size_t k, const float* label,
const score_t* score, data_size_t num_data);
/*!
......@@ -77,7 +77,7 @@ public:
*/
static void CalDCG(const std::vector<data_size_t>& ks,
const float* label, const score_t* score,
data_size_t num_data, std::vector<float>* out);
data_size_t num_data, std::vector<score_t>* out);
/*!
* \brief Calculate the Max DCG score at position k
......@@ -86,7 +86,7 @@ public:
* \param num_data Number of data
* \return The max DCG score
*/
static float CalMaxDCGAtK(data_size_t k,
static score_t CalMaxDCGAtK(data_size_t k,
const float* label, data_size_t num_data);
/*!
......@@ -97,22 +97,22 @@ public:
* \param out Output result
*/
static void CalMaxDCG(const std::vector<data_size_t>& ks,
const float* label, data_size_t num_data, std::vector<float>* out);
const float* label, data_size_t num_data, std::vector<score_t>* out);
/*!
* \brief Get discount score of position k
* \param k The position
* \return The discount of this position
*/
inline static float GetDiscount(data_size_t k) { return discount_[k]; }
inline static score_t GetDiscount(data_size_t k) { return discount_[k]; }
private:
/*! \brief True if inited, avoid init multi times */
static bool is_inited_;
/*! \brief store gains for different label */
static std::vector<float> label_gain_;
static std::vector<score_t> label_gain_;
/*! \brief store discount score for different position */
static std::vector<float> discount_;
static std::vector<score_t> discount_;
/*! \brief max position for eval */
static const data_size_t kMaxPosition;
};
......
......@@ -36,7 +36,7 @@ public:
* This function is used for prediction task, if has sigmoid param, the prediction value will be transform by sigmoid function.
* \return Sigmoid param, if <=0.0 means don't use sigmoid transform on this objective.
*/
virtual float GetSigmoid() const = 0;
virtual score_t GetSigmoid() const = 0;
/*!
* \brief Create object of objective function
......
......@@ -36,18 +36,18 @@ public:
* \param feature Index of feature; the converted index after removing useless features
* \param threshold Threshold(bin) of split
* \param real_feature Index of feature, the original index on data
* \param threshold_float Threshold on feature value
* \param threshold_double Threshold on feature value
* \param left_value Model Left child output
* \param right_value Model Right child output
* \param gain Split gain
* \return The index of new leaf.
*/
int Split(int leaf, int feature, unsigned int threshold, int real_feature,
float threshold_float, float left_value,
float right_value, float gain);
double threshold_double, double left_value,
double right_value, double gain);
/*! \brief Get the output of one leave */
inline float LeafOutput(int leaf) const { return leaf_value_[leaf]; }
inline double LeafOutput(int leaf) const { return leaf_value_[leaf]; }
/*!
* \brief Adding prediction value of this tree model to scores
......@@ -74,8 +74,8 @@ public:
* \param feature_values Feature value of this record
* \return Prediction result
*/
inline float Predict(const float* feature_values) const;
inline int PredictLeafIndex(const float* feature_values) const;
inline double Predict(const double* feature_values) const;
inline int PredictLeafIndex(const double* feature_values) const;
/*! \brief Get Number of leaves*/
inline int num_leaves() const { return num_leaves_; }
......@@ -91,7 +91,7 @@ public:
* shrinkage rate (a.k.a learning rate) is used to tune the traning process
* \param rate The factor of shrinkage
*/
inline void Shrinkage(float rate) {
inline void Shrinkage(double rate) {
for (int i = 0; i < num_leaves_; ++i) {
leaf_value_[i] = leaf_value_[i] * rate;
}
......@@ -119,7 +119,7 @@ private:
* \param feature_values Feature value of this record
* \return Leaf index
*/
inline int GetLeaf(const float* feature_values) const;
inline int GetLeaf(const double* feature_values) const;
/*! \brief Number of max leaves*/
int max_leaves_;
......@@ -137,25 +137,25 @@ private:
/*! \brief A non-leaf node's split threshold in bin */
unsigned int* threshold_in_bin_;
/*! \brief A non-leaf node's split threshold in feature value */
float* threshold_;
double* threshold_;
/*! \brief A non-leaf node's split gain */
float* split_gain_;
double* split_gain_;
// used for leaf node
/*! \brief The parent of leaf */
int* leaf_parent_;
/*! \brief Output of leaves */
float* leaf_value_;
double* leaf_value_;
/*! \brief Depth for leaves */
int* leaf_depth_;
};
inline float Tree::Predict(const float* feature_values) const {
inline double Tree::Predict(const double* feature_values) const {
int leaf = GetLeaf(feature_values);
return LeafOutput(leaf);
}
inline int Tree::PredictLeafIndex(const float* feature_values) const {
inline int Tree::PredictLeafIndex(const double* feature_values) const {
int leaf = GetLeaf(feature_values);
return leaf;
}
......@@ -174,7 +174,7 @@ inline int Tree::GetLeaf(const std::vector<BinIterator*>& iterators,
return ~node;
}
inline int Tree::GetLeaf(const float* feature_values) const {
inline int Tree::GetLeaf(const double* feature_values) const {
int node = 0;
while (node >= 0) {
if (feature_values[split_feature_real_[node]] <= threshold_[node]) {
......
......@@ -102,10 +102,9 @@ inline static const char* Atoi(const char* p, int* out) {
return p;
}
//ref to http://www.leapsecond.com/tools/fast_atof.c
inline static const char* Atof(const char* p, float* out) {
inline static const char* Atof(const char* p, double* out) {
int frac;
float sign, value, scale;
double sign, value, scale;
*out = 0;
// Skip leading white space, if any.
while (*p == ' ') {
......@@ -113,9 +112,9 @@ inline static const char* Atof(const char* p, float* out) {
}
// Get sign, if any.
sign = 1.0f;
sign = 1.0;
if (*p == '-') {
sign = -1.0f;
sign = -1.0;
++p;
} else if (*p == '+') {
++p;
......@@ -124,24 +123,24 @@ inline static const char* Atof(const char* p, float* out) {
// is a number
if ((*p >= '0' && *p <= '9') || *p == '.' || *p == 'e' || *p == 'E') {
// Get digits before decimal point or exponent, if any.
for (value = 0.0f; *p >= '0' && *p <= '9'; ++p) {
value = value * 10.0f + (*p - '0');
for (value = 0.0; *p >= '0' && *p <= '9'; ++p) {
value = value * 10.0 + (*p - '0');
}
// Get digits after decimal point, if any.
if (*p == '.') {
float pow10 = 10.0f;
double pow10 = 10.0;
++p;
while (*p >= '0' && *p <= '9') {
value += (*p - '0') / pow10;
pow10 *= 10.0f;
pow10 *= 10.0;
++p;
}
}
// Handle exponent, if any.
frac = 0;
scale = 1.0f;
scale = 1.0;
if ((*p == 'e') || (*p == 'E')) {
unsigned int expon;
// Get sign of exponent, if any.
......@@ -156,9 +155,11 @@ inline static const char* Atof(const char* p, float* out) {
for (expon = 0; *p >= '0' && *p <= '9'; ++p) {
expon = expon * 10 + (*p - '0');
}
if (expon > 38) expon = 38;
if (expon > 308) expon = 308;
// Calculate scaling factor.
while (expon >= 50) { scale *= 1E50; expon -= 50; }
while (expon >= 8) { scale *= 1E8; expon -= 8; }
while (expon > 0) { scale *= 10.0f; expon -= 1; }
while (expon > 0) { scale *= 10.0; expon -= 1; }
}
// Return signed and scaled floating point result.
*out = sign * (frac ? (value / scale) : (value * scale));
......@@ -174,9 +175,9 @@ inline static const char* Atof(const char* p, float* out) {
std::string tmp_str(p, cnt);
std::transform(tmp_str.begin(), tmp_str.end(), tmp_str.begin(), ::tolower);
if (tmp_str == std::string("na") || tmp_str == std::string("nan")) {
*out = 0.0f;
*out = 0;
} else if (tmp_str == std::string("inf") || tmp_str == std::string("infinity")) {
*out = sign * static_cast<float>(1e38);
*out = sign * 1e308;
} else {
Log::Fatal("Unknow token %s in data file", tmp_str.c_str());
}
......@@ -191,6 +192,8 @@ inline static const char* Atof(const char* p, float* out) {
return p;
}
inline bool AtoiAndCheck(const char* p, int* out) {
const char* after = Atoi(p, out);
if (*after != '\0') {
......@@ -199,7 +202,7 @@ inline bool AtoiAndCheck(const char* p, int* out) {
return true;
}
inline bool AtofAndCheck(const char* p, float* out) {
inline bool AtofAndCheck(const char* p, double* out) {
const char* after = Atof(p, out);
if (*after != '\0') {
return false;
......@@ -260,10 +263,11 @@ inline static void StringToIntArray(const std::string& str, char delimiter, size
}
}
inline static void StringToFloatArray(const std::string& str, char delimiter, size_t n, float* out) {
inline static void StringToDoubleArray(const std::string& str, char delimiter, size_t n, double* out) {
std::vector<std::string> strs = Split(str.c_str(), delimiter);
if (strs.size() != n) {
Log::Fatal("StringToFloatArray error, size doesn't matched.");
Log::Fatal("StringToDoubleArray error, size doesn't matched.");
}
for (size_t i = 0; i < strs.size(); ++i) {
strs[i] = Trim(strs[i]);
......@@ -271,12 +275,12 @@ inline static void StringToFloatArray(const std::string& str, char delimiter, si
}
}
inline static std::vector<float> StringToFloatArray(const std::string& str, char delimiter) {
inline static std::vector<double> StringToDoubleArray(const std::string& str, char delimiter) {
std::vector<std::string> strs = Split(str.c_str(), delimiter);
std::vector<float> ret;
std::vector<double> ret;
for (size_t i = 0; i < strs.size(); ++i) {
strs[i] = Trim(strs[i]);
float val = 0.0f;
double val = 0.0f;
Atof(strs[i].c_str(), &val);
ret.push_back(val);
}
......@@ -338,19 +342,19 @@ static inline int64_t Pow2RoundUp(int64_t x) {
* \brief Do inplace softmax transformaton on p_rec
* \param p_rec The input/output vector of the values.
*/
inline void Softmax(std::vector<float>* p_rec) {
std::vector<float> &rec = *p_rec;
float wmax = rec[0];
inline void Softmax(std::vector<double>* p_rec) {
std::vector<double> &rec = *p_rec;
double wmax = rec[0];
for (size_t i = 1; i < rec.size(); ++i) {
wmax = std::max(rec[i], wmax);
}
float wsum = 0.0f;
double wsum = 0.0f;
for (size_t i = 0; i < rec.size(); ++i) {
rec[i] = std::exp(rec[i] - wmax);
wsum += rec[i];
}
for (size_t i = 0; i < rec.size(); ++i) {
rec[i] /= static_cast<float>(wsum);
rec[i] /= static_cast<double>(wsum);
}
}
......
......@@ -125,7 +125,7 @@ void Application::LoadData() {
if (boosting_->NumberOfSubModels() > 0) {
predictor = new Predictor(boosting_, config_.io_config.is_sigmoid, config_.predict_leaf_index, -1);
predict_fun =
[&predictor](const std::vector<std::pair<int, float>>& features) {
[&predictor](const std::vector<std::pair<int, double>>& features) {
return predictor->PredictRawOneLine(features);
};
}
......@@ -210,7 +210,7 @@ void Application::InitTrain() {
gbdt_config->tree_config.feature_fraction_seed =
GlobalSyncUpByMin<int>(gbdt_config->tree_config.feature_fraction_seed);
gbdt_config->tree_config.feature_fraction =
GlobalSyncUpByMin<float>(gbdt_config->tree_config.feature_fraction);
GlobalSyncUpByMin<double>(gbdt_config->tree_config.feature_fraction);
}
}
// create boosting
......
......@@ -39,9 +39,9 @@ public:
{
num_threads_ = omp_get_num_threads();
}
features_ = new float*[num_threads_];
features_ = new double*[num_threads_];
for (int i = 0; i < num_threads_; ++i) {
features_[i] = new float[num_features_];
features_[i] = new double[num_features_];
}
}
/*!
......@@ -61,7 +61,7 @@ public:
* \param features Feature for this record
* \return Prediction result
*/
float PredictRawOneLine(const std::vector<std::pair<int, float>>& features) {
double PredictRawOneLine(const std::vector<std::pair<int, double>>& features) {
const int tid = PutFeatureValuesToBuffer(features);
// get result without sigmoid transformation
return boosting_->PredictRaw(features_[tid], num_used_model_);
......@@ -72,7 +72,7 @@ public:
* \param features Feature for this record
* \return Predictied leaf index
*/
std::vector<int> PredictLeafIndexOneLine(const std::vector<std::pair<int, float>>& features) {
std::vector<int> PredictLeafIndexOneLine(const std::vector<std::pair<int, double>>& features) {
const int tid = PutFeatureValuesToBuffer(features);
// get result for leaf index
return boosting_->PredictLeafIndex(features_[tid], num_used_model_);
......@@ -83,7 +83,7 @@ public:
* \param features Feature of this record
* \return Prediction result
*/
float PredictOneLine(const std::vector<std::pair<int, float>>& features) {
double PredictOneLine(const std::vector<std::pair<int, double>>& features) {
const int tid = PutFeatureValuesToBuffer(features);
// get result with sigmoid transform if needed
return boosting_->Predict(features_[tid], num_used_model_);
......@@ -94,7 +94,7 @@ public:
* \param features Feature of this record
* \return Prediction result
*/
std::vector<float> PredictMulticlassOneLine(const std::vector<std::pair<int, float>>& features) {
std::vector<double> PredictMulticlassOneLine(const std::vector<std::pair<int, double>>& features) {
const int tid = PutFeatureValuesToBuffer(features);
// get result with sigmoid transform if needed
return boosting_->PredictMulticlass(features_[tid], num_used_model_);
......@@ -125,17 +125,17 @@ public:
}
// function for parse data
std::function<void(const char*, std::vector<std::pair<int, float>>*)> parser_fun;
float tmp_label;
std::function<void(const char*, std::vector<std::pair<int, double>>*)> parser_fun;
double tmp_label;
parser_fun = [this, &parser, &tmp_label]
(const char* buffer, std::vector<std::pair<int, float>>* feature) {
(const char* buffer, std::vector<std::pair<int, double>>* feature) {
parser->ParseOneLine(buffer, feature, &tmp_label);
};
std::function<std::string(const std::vector<std::pair<int, float>>&)> predict_fun;
std::function<std::string(const std::vector<std::pair<int, double>>&)> predict_fun;
if (num_class_ > 1) {
predict_fun = [this](const std::vector<std::pair<int, float>>& features){
std::vector<float> prediction = PredictMulticlassOneLine(features);
predict_fun = [this](const std::vector<std::pair<int, double>>& features){
std::vector<double> prediction = PredictMulticlassOneLine(features);
std::stringstream result_stream_buf;
for (size_t i = 0; i < prediction.size(); ++i){
if (i > 0) {
......@@ -147,7 +147,7 @@ public:
};
}
else if (is_predict_leaf_index_) {
predict_fun = [this](const std::vector<std::pair<int, float>>& features){
predict_fun = [this](const std::vector<std::pair<int, double>>& features){
std::vector<int> predicted_leaf_index = PredictLeafIndexOneLine(features);
std::stringstream result_stream_buf;
for (size_t i = 0; i < predicted_leaf_index.size(); ++i){
......@@ -161,12 +161,12 @@ public:
}
else {
if (is_simgoid_) {
predict_fun = [this](const std::vector<std::pair<int, float>>& features){
predict_fun = [this](const std::vector<std::pair<int, double>>& features){
return std::to_string(PredictOneLine(features));
};
}
else {
predict_fun = [this](const std::vector<std::pair<int, float>>& features){
predict_fun = [this](const std::vector<std::pair<int, double>>& features){
return std::to_string(PredictRawOneLine(features));
};
}
......@@ -174,7 +174,7 @@ public:
std::function<void(data_size_t, const std::vector<std::string>&)> process_fun =
[this, &parser_fun, &predict_fun, &result_file]
(data_size_t, const std::vector<std::string>& lines) {
std::vector<std::pair<int, float>> oneline_features;
std::vector<std::pair<int, double>> oneline_features;
std::vector<std::string> pred_result(lines.size(), "");
#pragma omp parallel for schedule(static) private(oneline_features)
for (data_size_t i = 0; i < static_cast<data_size_t>(lines.size()); ++i) {
......@@ -197,10 +197,10 @@ public:
}
private:
int PutFeatureValuesToBuffer(const std::vector<std::pair<int, float>>& features) {
int PutFeatureValuesToBuffer(const std::vector<std::pair<int, double>>& features) {
int tid = omp_get_thread_num();
// init feature value
std::memset(features_[tid], 0, sizeof(float)*num_features_);
std::memset(features_[tid], 0, sizeof(double)*num_features_);
// put feature value
for (const auto& p : features) {
if (p.first < num_features_) {
......@@ -212,7 +212,7 @@ private:
/*! \brief Boosting model */
const Boosting* boosting_;
/*! \brief Buffer for feature values */
float** features_;
double** features_;
/*! \brief Number of features */
int num_features_;
/*! \brief Number of classes */
......
......@@ -229,7 +229,7 @@ bool GBDT::OutputMetric(int iter) {
for (auto& sub_metric : training_metrics_) {
auto name = sub_metric->GetName();
auto scores = sub_metric->Eval(train_score_updater_->score());
Log::Info("Iteration:%d, %s : %s", iter, name, Common::ArrayToString<float>(scores, ' ').c_str());
Log::Info("Iteration:%d, %s : %s", iter, name, Common::ArrayToString<double>(scores, ' ').c_str());
}
}
// print validation metric
......@@ -239,7 +239,7 @@ bool GBDT::OutputMetric(int iter) {
auto test_scores = valid_metrics_[i][j]->Eval(valid_score_updater_[i]->score());
if ((iter % gbdt_config_->output_freq) == 0) {
auto name = valid_metrics_[i][j]->GetName();
Log::Info("Iteration:%d, %s : %s", iter, name, Common::ArrayToString<float>(test_scores, ' ').c_str());
Log::Info("Iteration:%d, %s : %s", iter, name, Common::ArrayToString<double>(test_scores, ' ').c_str());
}
if (!ret && early_stopping_round_ > 0) {
bool the_bigger_the_better = valid_metrics_[i][j]->is_bigger_better();
......@@ -266,7 +266,7 @@ std::vector<std::string> GBDT::EvalCurrent(bool is_eval_train) const {
auto name = sub_metric->GetName();
auto scores = sub_metric->Eval(train_score_updater_->score());
std::stringstream str_buf;
str_buf << name << " : " << Common::ArrayToString<float>(scores, ' ');
str_buf << name << " : " << Common::ArrayToString<double>(scores, ' ');
ret.emplace_back(str_buf.str());
}
}
......@@ -276,7 +276,7 @@ std::vector<std::string> GBDT::EvalCurrent(bool is_eval_train) const {
auto name = valid_metrics_[i][j]->GetName();
auto test_scores = valid_metrics_[i][j]->Eval(valid_score_updater_[i]->score());
std::stringstream str_buf;
str_buf << name << " : " << Common::ArrayToString<float>(test_scores, ' ');
str_buf << name << " : " << Common::ArrayToString<double>(test_scores, ' ');
ret.emplace_back(str_buf.str());
}
}
......@@ -420,7 +420,7 @@ void GBDT::ModelsFromString(const std::string& model_str) {
}
// if sigmoid doesn't exists
if (i == lines.size()) {
sigmoid_ = -1.0;
sigmoid_ = -1.0f;
}
// get tree models
i = 0;
......@@ -467,22 +467,22 @@ std::string GBDT::FeatureImportance() const {
return str_buf.str();
}
float GBDT::PredictRaw(const float* value, int num_used_model) const {
double GBDT::PredictRaw(const double* value, int num_used_model) const {
if (num_used_model < 0) {
num_used_model = static_cast<int>(models_.size());
}
float ret = 0.0f;
double ret = 0.0f;
for (int i = 0; i < num_used_model; ++i) {
ret += models_[i]->Predict(value);
}
return ret;
}
float GBDT::Predict(const float* value, int num_used_model) const {
double GBDT::Predict(const double* value, int num_used_model) const {
if (num_used_model < 0) {
num_used_model = static_cast<int>(models_.size());
}
float ret = 0.0f;
double ret = 0.0f;
for (int i = 0; i < num_used_model; ++i) {
ret += models_[i]->Predict(value);
}
......@@ -493,11 +493,11 @@ float GBDT::Predict(const float* value, int num_used_model) const {
return ret;
}
std::vector<float> GBDT::PredictMulticlass(const float* value, int num_used_model) const {
std::vector<double> GBDT::PredictMulticlass(const double* value, int num_used_model) const {
if (num_used_model < 0) {
num_used_model = static_cast<int>(models_.size()) / num_class_;
}
std::vector<float> ret(num_class_, 0.0f);
std::vector<double> ret(num_class_, 0.0f);
for (int i = 0; i < num_used_model; ++i) {
for (int j = 0; j < num_class_; ++j){
ret[j] += models_[i * num_class_ + j] -> Predict(value);
......@@ -507,7 +507,7 @@ std::vector<float> GBDT::PredictMulticlass(const float* value, int num_used_mode
return ret;
}
std::vector<int> GBDT::PredictLeafIndex(const float* value, int num_used_model) const {
std::vector<int> GBDT::PredictLeafIndex(const double* value, int num_used_model) const {
if (num_used_model < 0) {
num_used_model = static_cast<int>(models_.size());
}
......
......@@ -58,7 +58,7 @@ public:
* \param num_used_model Number of used model
* \return Prediction result for this record
*/
float PredictRaw(const float* feature_values, int num_used_model) const override;
double PredictRaw(const double* feature_values, int num_used_model) const override;
/*!
* \brief Predtion for one record with sigmoid transformation if enabled
......@@ -66,14 +66,14 @@ public:
* \param num_used_model Number of used model
* \return Prediction result for this record
*/
float Predict(const float* feature_values, int num_used_model) const override;
double Predict(const double* feature_values, int num_used_model) const override;
/*!
* \brief Predtion for multiclass classification
* \param feature_values Feature value on this record
* \return Prediction result, num_class numbers per line
*/
std::vector<float> PredictMulticlass(const float* value, int num_used_model) const override;
std::vector<double> PredictMulticlass(const double* value, int num_used_model) const override;
/*!
* \brief Predtion for one record with leaf index
......@@ -81,7 +81,7 @@ public:
* \param num_used_model Number of used model
* \return Predicted leaf index for this record
*/
std::vector<int> PredictLeafIndex(const float* value, int num_used_model) const override;
std::vector<int> PredictLeafIndex(const double* value, int num_used_model) const override;
/*!
* \brief Serialize models by string
......@@ -177,7 +177,7 @@ private:
int early_stopping_round_;
/*! \brief Best score(s) for early stopping */
std::vector<std::vector<int>> best_iter_;
std::vector<std::vector<score_t>> best_score_;
std::vector<std::vector<double>> best_score_;
/*! \brief Trained models(trees) */
std::vector<Tree*> models_;
/*! \brief Max feature index of training data*/
......@@ -204,7 +204,7 @@ private:
* \brief Sigmoid parameter, used for prediction.
* if > 0 meas output score will transform by sigmoid function
*/
float sigmoid_;
double sigmoid_;
/*! \brief Index of label column */
data_size_t label_idx_;
/*! \brief Saved number of models */
......
......@@ -24,7 +24,7 @@ BinMapper::BinMapper(const BinMapper& other)
num_bin_ = other.num_bin_;
is_trival_ = other.is_trival_;
sparse_rate_ = other.sparse_rate_;
bin_upper_bound_ = new float[num_bin_];
bin_upper_bound_ = new double[num_bin_];
for (int i = 0; i < num_bin_; ++i) {
bin_upper_bound_[i] = other.bin_upper_bound_[i];
}
......@@ -39,11 +39,11 @@ BinMapper::~BinMapper() {
delete[] bin_upper_bound_;
}
void BinMapper::FindBin(std::vector<float>* values, int max_bin) {
std::vector<float>& ref_values = (*values);
void BinMapper::FindBin(std::vector<double>* values, int max_bin) {
std::vector<double>& ref_values = (*values);
size_t sample_size = values->size();
// find distinct_values first
std::vector<float> distinct_values;
std::vector<double> distinct_values;
std::vector<int> counts;
std::sort(ref_values.begin(), ref_values.end());
......@@ -63,21 +63,21 @@ void BinMapper::FindBin(std::vector<float>* values, int max_bin) {
if (num_values <= max_bin) {
// use distinct value is enough
num_bin_ = num_values;
bin_upper_bound_ = new float[num_values];
bin_upper_bound_ = new double[num_values];
for (int i = 0; i < num_values - 1; ++i) {
bin_upper_bound_[i] = (distinct_values[i] + distinct_values[i + 1]) / 2;
}
cnt_in_bin0 = counts[0];
bin_upper_bound_[num_values - 1] = std::numeric_limits<float>::infinity();
bin_upper_bound_[num_values - 1] = std::numeric_limits<double>::infinity();
} else {
// mean size for one bin
float mean_bin_size = sample_size / static_cast<float>(max_bin);
double mean_bin_size = sample_size / static_cast<double>(max_bin);
int rest_sample_cnt = static_cast<int>(sample_size);
int bin_cnt = 0;
num_bin_ = max_bin;
std::vector<float> upper_bounds(max_bin, std::numeric_limits<float>::infinity());
std::vector<float> lower_bounds(max_bin, std::numeric_limits<float>::infinity());
std::vector<double> upper_bounds(max_bin, std::numeric_limits<double>::infinity());
std::vector<double> lower_bounds(max_bin, std::numeric_limits<double>::infinity());
// sort by count, descent
Common::SortForPair(counts, distinct_values, 0, true);
// fetch big slot as unique bin
......@@ -90,8 +90,8 @@ void BinMapper::FindBin(std::vector<float>* values, int max_bin) {
// process reminder bins
if (bin_cnt < max_bin) {
// sort rest by values
Common::SortForPair<float, int>(distinct_values, counts, bin_cnt, false);
mean_bin_size = rest_sample_cnt / static_cast<float>(max_bin - bin_cnt);
Common::SortForPair<double, int>(distinct_values, counts, bin_cnt, false);
mean_bin_size = rest_sample_cnt / static_cast<double>(max_bin - bin_cnt);
lower_bounds[bin_cnt] = distinct_values[bin_cnt];
int cur_cnt_inbin = 0;
for (int i = bin_cnt; i < num_values - 1; ++i) {
......@@ -105,21 +105,21 @@ void BinMapper::FindBin(std::vector<float>* values, int max_bin) {
lower_bounds[bin_cnt] = distinct_values[i + 1];
if (bin_cnt >= max_bin - 1) break;
cur_cnt_inbin = 0;
mean_bin_size = rest_sample_cnt / static_cast<float>(max_bin - bin_cnt);
mean_bin_size = rest_sample_cnt / static_cast<double>(max_bin - bin_cnt);
}
}
cur_cnt_inbin += counts[num_values - 1];
}
Common::SortForPair<float, float>(lower_bounds, upper_bounds, 0, false);
Common::SortForPair<double, double>(lower_bounds, upper_bounds, 0, false);
// update bin upper bound
bin_upper_bound_ = new float[bin_cnt];
bin_upper_bound_ = new double[bin_cnt];
num_bin_ = bin_cnt;
for (int i = 0; i < bin_cnt - 1; ++i) {
bin_upper_bound_[i] = (upper_bounds[i] + lower_bounds[i + 1]) / 2.0f;
}
// last bin upper bound
bin_upper_bound_[bin_cnt - 1] = std::numeric_limits<float>::infinity();
bin_upper_bound_[bin_cnt - 1] = std::numeric_limits<double>::infinity();
}
// check trival(num_bin_ == 1) feature
if (num_bin_ <= 1) {
......@@ -128,7 +128,7 @@ void BinMapper::FindBin(std::vector<float>* values, int max_bin) {
is_trival_ = false;
}
// calculate sparse rate
sparse_rate_ = static_cast<float>(cnt_in_bin0) / static_cast<float>(sample_size);
sparse_rate_ = static_cast<double>(cnt_in_bin0) / static_cast<double>(sample_size);
}
......@@ -136,8 +136,8 @@ int BinMapper::SizeForSpecificBin(int bin) {
int size = 0;
size += sizeof(int);
size += sizeof(bool);
size += sizeof(float);
size += bin * sizeof(float);
size += sizeof(double);
size += bin * sizeof(double);
return size;
}
......@@ -148,7 +148,7 @@ void BinMapper::CopyTo(char * buffer) {
buffer += sizeof(is_trival_);
std::memcpy(buffer, &sparse_rate_, sizeof(sparse_rate_));
buffer += sizeof(sparse_rate_);
std::memcpy(buffer, bin_upper_bound_, num_bin_ * sizeof(float));
std::memcpy(buffer, bin_upper_bound_, num_bin_ * sizeof(double));
}
void BinMapper::CopyFrom(const char * buffer) {
......@@ -159,19 +159,19 @@ void BinMapper::CopyFrom(const char * buffer) {
std::memcpy(&sparse_rate_, buffer, sizeof(sparse_rate_));
buffer += sizeof(sparse_rate_);
if (bin_upper_bound_ != nullptr) { delete[] bin_upper_bound_; }
bin_upper_bound_ = new float[num_bin_];
std::memcpy(bin_upper_bound_, buffer, num_bin_ * sizeof(float));
bin_upper_bound_ = new double[num_bin_];
std::memcpy(bin_upper_bound_, buffer, num_bin_ * sizeof(double));
}
void BinMapper::SaveBinaryToFile(FILE* file) const {
fwrite(&num_bin_, sizeof(num_bin_), 1, file);
fwrite(&is_trival_, sizeof(is_trival_), 1, file);
fwrite(&sparse_rate_, sizeof(sparse_rate_), 1, file);
fwrite(bin_upper_bound_, sizeof(float), num_bin_, file);
fwrite(bin_upper_bound_, sizeof(double), num_bin_, file);
}
size_t BinMapper::SizesInByte() const {
return sizeof(num_bin_) + sizeof(is_trival_) + sizeof(sparse_rate_) + sizeof(float) * num_bin_;
return sizeof(num_bin_) + sizeof(is_trival_) + sizeof(sparse_rate_) + sizeof(double) * num_bin_;
}
template class DenseBin<uint8_t>;
......@@ -187,9 +187,9 @@ template class OrderedSparseBin<uint16_t>;
template class OrderedSparseBin<uint32_t>;
Bin* Bin::CreateBin(data_size_t num_data, int num_bin, float sparse_rate, bool is_enable_sparse, bool* is_sparse, int default_bin) {
Bin* Bin::CreateBin(data_size_t num_data, int num_bin, double sparse_rate, bool is_enable_sparse, bool* is_sparse, int default_bin) {
// sparse threshold
const float kSparseThreshold = 0.8f;
const double kSparseThreshold = 0.8f;
if (sparse_rate >= kSparseThreshold && is_enable_sparse) {
*is_sparse = true;
return CreateSparseBin(num_data, num_bin, default_bin);
......
......@@ -216,38 +216,38 @@ void IOConfig::Set(const std::unordered_map<std::string, std::string>& params) {
void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& params) {
GetBool(params, "is_unbalance", &is_unbalance);
GetFloat(params, "sigmoid", &sigmoid);
GetDouble(params, "sigmoid", &sigmoid);
GetInt(params, "max_position", &max_position);
CHECK(max_position > 0);
GetInt(params, "num_class", &num_class);
CHECK(num_class >= 1);
std::string tmp_str = "";
if (GetString(params, "label_gain", &tmp_str)) {
label_gain = Common::StringToFloatArray(tmp_str, ',');
label_gain = Common::StringToDoubleArray(tmp_str, ',');
} else {
// label_gain = 2^i - 1, may overflow, so we use 31 here
const int max_label = 31;
label_gain.push_back(0.0f);
for (int i = 1; i < max_label; ++i) {
label_gain.push_back(static_cast<float>((1 << i) - 1));
label_gain.push_back(static_cast<double>((1 << i) - 1));
}
}
}
void MetricConfig::Set(const std::unordered_map<std::string, std::string>& params) {
GetFloat(params, "sigmoid", &sigmoid);
GetDouble(params, "sigmoid", &sigmoid);
GetInt(params, "num_class", &num_class);
CHECK(num_class >= 1);
std::string tmp_str = "";
if (GetString(params, "label_gain", &tmp_str)) {
label_gain = Common::StringToFloatArray(tmp_str, ',');
label_gain = Common::StringToDoubleArray(tmp_str, ',');
} else {
// label_gain = 2^i - 1, may overflow, so we use 31 here
const int max_label = 31;
label_gain.push_back(0.0f);
for (int i = 1; i < max_label; ++i) {
label_gain.push_back(static_cast<float>((1 << i) - 1));
label_gain.push_back(static_cast<double>((1 << i) - 1));
}
}
if (GetString(params, "ndcg_eval_at", &tmp_str)) {
......@@ -267,14 +267,14 @@ void MetricConfig::Set(const std::unordered_map<std::string, std::string>& param
void TreeConfig::Set(const std::unordered_map<std::string, std::string>& params) {
GetInt(params, "min_data_in_leaf", &min_data_in_leaf);
GetFloat(params, "min_sum_hessian_in_leaf", &min_sum_hessian_in_leaf);
GetDouble(params, "min_sum_hessian_in_leaf", &min_sum_hessian_in_leaf);
CHECK(min_sum_hessian_in_leaf > 1.0f || min_data_in_leaf > 0);
GetInt(params, "num_leaves", &num_leaves);
CHECK(num_leaves > 1);
GetInt(params, "feature_fraction_seed", &feature_fraction_seed);
GetFloat(params, "feature_fraction", &feature_fraction);
GetDouble(params, "feature_fraction", &feature_fraction);
CHECK(feature_fraction > 0.0f && feature_fraction <= 1.0f);
GetFloat(params, "histogram_pool_size", &histogram_pool_size);
GetDouble(params, "histogram_pool_size", &histogram_pool_size);
GetInt(params, "max_depth", &max_depth);
CHECK(max_depth > 1 || max_depth < 0);
}
......@@ -286,9 +286,9 @@ void BoostingConfig::Set(const std::unordered_map<std::string, std::string>& par
GetInt(params, "bagging_seed", &bagging_seed);
GetInt(params, "bagging_freq", &bagging_freq);
CHECK(bagging_freq >= 0);
GetFloat(params, "bagging_fraction", &bagging_fraction);
GetDouble(params, "bagging_fraction", &bagging_fraction);
CHECK(bagging_fraction > 0.0f && bagging_fraction <= 1.0f);
GetFloat(params, "learning_rate", &learning_rate);
GetDouble(params, "learning_rate", &learning_rate);
CHECK(learning_rate > 0.0f);
GetInt(params, "early_stopping_round", &early_stopping_round);
CHECK(early_stopping_round >= 0);
......
......@@ -364,10 +364,10 @@ void Dataset::SetField(const char* field_name, const void* field_data, data_size
void Dataset::ConstructBinMappers(int rank, int num_machines, const std::vector<std::string>& sample_data) {
// sample_values[i][j], means the value of j-th sample on i-th feature
std::vector<std::vector<float>> sample_values;
std::vector<std::vector<double>> sample_values;
// temp buffer for one line features and label
std::vector<std::pair<int, float>> oneline_features;
float label;
std::vector<std::pair<int, double>> oneline_features;
double label;
for (size_t i = 0; i < sample_data.size(); ++i) {
oneline_features.clear();
// parse features
......@@ -376,7 +376,7 @@ void Dataset::ConstructBinMappers(int rank, int num_machines, const std::vector<
for (auto& feature_values : sample_values) {
feature_values.push_back(0.0);
}
for (std::pair<int, float>& inner_data : oneline_features) {
for (std::pair<int, double>& inner_data : oneline_features) {
if (static_cast<size_t>(inner_data.first) >= sample_values.size()) {
// if need expand feature set
size_t need_size = inner_data.first - sample_values.size() + 1;
......@@ -571,6 +571,8 @@ void Dataset::LoadValidationData(const Dataset* train_set, bool use_two_round_lo
}
used_feature_map_ = train_set->used_feature_map_;
num_features_ = static_cast<int>(features_.size());
num_total_features_ = train_set->num_total_features_;
feature_names_ = train_set->feature_names_;
// extract features
ExtractFeaturesFromMemory();
} else {
......@@ -585,6 +587,8 @@ void Dataset::LoadValidationData(const Dataset* train_set, bool use_two_round_lo
}
used_feature_map_ = train_set->used_feature_map_;
num_features_ = static_cast<int>(features_.size());
num_total_features_ = train_set->num_total_features_;
feature_names_ = train_set->feature_names_;
// extract features
ExtractFeaturesFromFile();
}
......@@ -601,8 +605,8 @@ void Dataset::LoadValidationData(const Dataset* train_set, bool use_two_round_lo
}
void Dataset::ExtractFeaturesFromMemory() {
std::vector<std::pair<int, float>> oneline_features;
float tmp_label = 0.0f;
std::vector<std::pair<int, double>> oneline_features;
double tmp_label = 0.0f;
if (predict_fun_ == nullptr) {
// if doesn't need to prediction with initial model
#pragma omp parallel for schedule(guided) private(oneline_features) firstprivate(tmp_label)
......@@ -612,7 +616,7 @@ void Dataset::ExtractFeaturesFromMemory() {
// parser
parser_->ParseOneLine(text_reader_->Lines()[i].c_str(), &oneline_features, &tmp_label);
// set label
metadata_.SetLabelAt(i, tmp_label);
metadata_.SetLabelAt(i, static_cast<float>(tmp_label));
// free processed line:
text_reader_->Lines()[i].clear();
// shrink_to_fit will be very slow in linux, and seems not free memory, disable for now
......@@ -626,9 +630,9 @@ void Dataset::ExtractFeaturesFromMemory() {
}
else {
if (inner_data.first == weight_idx_) {
metadata_.SetWeightAt(i, inner_data.second);
metadata_.SetWeightAt(i, static_cast<float>(inner_data.second));
} else if (inner_data.first == group_idx_) {
metadata_.SetQueryAt(i, inner_data.second);
metadata_.SetQueryAt(i, static_cast<float>(inner_data.second));
}
}
}
......@@ -645,7 +649,7 @@ void Dataset::ExtractFeaturesFromMemory() {
// set initial score
init_score[i] = static_cast<float>(predict_fun_(oneline_features));
// set label
metadata_.SetLabelAt(i, tmp_label);
metadata_.SetLabelAt(i, static_cast<float>(tmp_label));
// free processed line:
text_reader_->Lines()[i].clear();
// shrink_to_fit will be very slow in linux, and seems not free memory, disable for now
......@@ -659,9 +663,9 @@ void Dataset::ExtractFeaturesFromMemory() {
}
else {
if (inner_data.first == weight_idx_) {
metadata_.SetWeightAt(i, inner_data.second);
metadata_.SetWeightAt(i, static_cast<float>(inner_data.second));
} else if (inner_data.first == group_idx_) {
metadata_.SetQueryAt(i, inner_data.second);
metadata_.SetQueryAt(i, static_cast<float>(inner_data.second));
}
}
}
......@@ -688,8 +692,8 @@ void Dataset::ExtractFeaturesFromFile() {
std::function<void(data_size_t, const std::vector<std::string>&)> process_fun =
[this, &init_score]
(data_size_t start_idx, const std::vector<std::string>& lines) {
std::vector<std::pair<int, float>> oneline_features;
float tmp_label = 0.0f;
std::vector<std::pair<int, double>> oneline_features;
double tmp_label = 0.0f;
#pragma omp parallel for schedule(static) private(oneline_features) firstprivate(tmp_label)
for (data_size_t i = 0; i < static_cast<data_size_t>(lines.size()); ++i) {
const int tid = omp_get_thread_num();
......@@ -701,7 +705,7 @@ void Dataset::ExtractFeaturesFromFile() {
init_score[start_idx + i] = static_cast<float>(predict_fun_(oneline_features));
}
// set label
metadata_.SetLabelAt(start_idx + i, tmp_label);
metadata_.SetLabelAt(start_idx + i, static_cast<float>(tmp_label));
// push data
for (auto& inner_data : oneline_features) {
int feature_idx = used_feature_map_[inner_data.first];
......@@ -711,9 +715,9 @@ void Dataset::ExtractFeaturesFromFile() {
}
else {
if (inner_data.first == weight_idx_) {
metadata_.SetWeightAt(start_idx + i, inner_data.second);
metadata_.SetWeightAt(start_idx + i, static_cast<float>(inner_data.second));
} else if (inner_data.first == group_idx_) {
metadata_.SetQueryAt(start_idx + i, inner_data.second);
metadata_.SetQueryAt(start_idx + i, static_cast<float>(inner_data.second));
}
}
}
......@@ -763,7 +767,11 @@ void Dataset::SaveBinaryFile(const char* bin_filename) {
// get size of header
size_t size_of_header = sizeof(global_num_data_) + sizeof(is_enable_sparse_)
+ sizeof(max_bin_) + sizeof(num_data_) + sizeof(num_features_) + sizeof(size_t) + sizeof(int) * used_feature_map_.size();
+ sizeof(max_bin_) + sizeof(num_data_) + sizeof(num_features_) + sizeof(num_total_features_) +sizeof(size_t) + sizeof(int) * used_feature_map_.size();
// size of feature names
for (int i = 0; i < num_total_features_; ++i) {
size_of_header += feature_names_[i].size() + sizeof(int);
}
fwrite(&size_of_header, sizeof(size_of_header), 1, file);
// write header
fwrite(&global_num_data_, sizeof(global_num_data_), 1, file);
......@@ -771,10 +779,19 @@ void Dataset::SaveBinaryFile(const char* bin_filename) {
fwrite(&max_bin_, sizeof(max_bin_), 1, file);
fwrite(&num_data_, sizeof(num_data_), 1, file);
fwrite(&num_features_, sizeof(num_features_), 1, file);
fwrite(&num_total_features_, sizeof(num_features_), 1, file);
size_t num_used_feature_map = used_feature_map_.size();
fwrite(&num_used_feature_map, sizeof(num_used_feature_map), 1, file);
fwrite(used_feature_map_.data(), sizeof(int), num_used_feature_map, file);
// write feature names
for (int i = 0; i < num_total_features_; ++i) {
int str_len = static_cast<int>(feature_names_[i].size());
fwrite(&str_len, sizeof(int), 1, file);
const char* c_str = feature_names_[i].c_str();
fwrite(c_str, sizeof(char), str_len, file);
}
// get size of meta data
size_t size_of_metadata = metadata_.SizesInByte();
fwrite(&size_of_metadata, sizeof(size_of_metadata), 1, file);
......@@ -864,6 +881,8 @@ void Dataset::LoadDataFromBinFile(const char* bin_filename, int rank, int num_ma
mem_ptr += sizeof(num_data_);
num_features_ = *(reinterpret_cast<const int*>(mem_ptr));
mem_ptr += sizeof(num_features_);
num_total_features_ = *(reinterpret_cast<const int*>(mem_ptr));
mem_ptr += sizeof(num_total_features_);
size_t num_used_feature_map = *(reinterpret_cast<const size_t*>(mem_ptr));
mem_ptr += sizeof(num_used_feature_map);
const int* tmp_feature_map = reinterpret_cast<const int*>(mem_ptr);
......@@ -871,6 +890,21 @@ void Dataset::LoadDataFromBinFile(const char* bin_filename, int rank, int num_ma
for (size_t i = 0; i < num_used_feature_map; ++i) {
used_feature_map_.push_back(tmp_feature_map[i]);
}
mem_ptr += sizeof(int) * num_used_feature_map;
// get feature names
feature_names_.clear();
// write feature names
for (int i = 0; i < num_total_features_; ++i) {
int str_len = *(reinterpret_cast<const int*>(mem_ptr));
mem_ptr += sizeof(int);
std::stringstream str_buf;
for (int j = 0; j < str_len; ++j) {
char tmp_char = *(reinterpret_cast<const char*>(mem_ptr));
mem_ptr += sizeof(char);
str_buf << tmp_char;
}
feature_names_.emplace_back(str_buf.str());
}
// read size of meta data
read_cnt = fread(buffer, sizeof(size_t), 1, file);
......
......@@ -281,9 +281,9 @@ void Metadata::LoadWeights() {
num_weights_ = static_cast<data_size_t>(reader.Lines().size());
weights_ = new float[num_weights_];
for (data_size_t i = 0; i < num_weights_; ++i) {
float tmp_weight = 0.0f;
double tmp_weight = 0.0f;
Common::Atof(reader.Lines()[i].c_str(), &tmp_weight);
weights_[i] = tmp_weight;
weights_[i] = static_cast<float>(tmp_weight);
}
}
......@@ -296,10 +296,10 @@ void Metadata::LoadInitialScore() {
Log::Info("Start loading initial scores");
num_init_score_ = static_cast<data_size_t>(reader.Lines().size());
init_score_ = new float[num_init_score_];
float tmp = 0.0f;
double tmp = 0.0f;
for (data_size_t i = 0; i < num_init_score_; ++i) {
Common::Atof(reader.Lines()[i].c_str(), &tmp);
init_score_[i] = tmp;
init_score_[i] = static_cast<float>(tmp);
}
}
......
......@@ -18,9 +18,9 @@ public:
:label_idx_(label_idx) {
}
inline void ParseOneLine(const char* str,
std::vector<std::pair<int, float>>* out_features, float* out_label) const override {
std::vector<std::pair<int, double>>* out_features, double* out_label) const override {
int idx = 0;
float val = 0.0f;
double val = 0.0f;
int bias = 0;
*out_label = 0.0f;
while (*str != '\0') {
......@@ -50,9 +50,9 @@ public:
:label_idx_(label_idx) {
}
inline void ParseOneLine(const char* str,
std::vector<std::pair<int, float>>* out_features, float* out_label) const override {
std::vector<std::pair<int, double>>* out_features, double* out_label) const override {
int idx = 0;
float val = 0.0f;
double val = 0.0f;
int bias = 0;
while (*str != '\0') {
str = Common::Atof(str, &val);
......@@ -83,9 +83,9 @@ public:
}
}
inline void ParseOneLine(const char* str,
std::vector<std::pair<int, float>>* out_features, float* out_label) const override {
std::vector<std::pair<int, double>>* out_features, double* out_label) const override {
int idx = 0;
float val = 0.0f;
double val = 0.0f;
if (label_idx_ == 0) {
str = Common::Atof(str, &val);
*out_label = val;
......
......@@ -23,11 +23,11 @@ Tree::Tree(int max_leaves)
split_feature_ = new int[max_leaves_ - 1];
split_feature_real_ = new int[max_leaves_ - 1];
threshold_in_bin_ = new unsigned int[max_leaves_ - 1];
threshold_ = new float[max_leaves_ - 1];
split_gain_ = new float[max_leaves_ - 1];
threshold_ = new double[max_leaves_ - 1];
split_gain_ = new double[max_leaves_ - 1];
leaf_parent_ = new int[max_leaves_];
leaf_value_ = new float[max_leaves_];
leaf_value_ = new double[max_leaves_];
leaf_depth_ = new int[max_leaves_];
// root is in the depth 1
leaf_depth_[0] = 1;
......@@ -48,7 +48,7 @@ Tree::~Tree() {
}
int Tree::Split(int leaf, int feature, unsigned int threshold_bin, int real_feature,
float threshold, float left_value, float right_value, float gain) {
double threshold, double left_value, double right_value, double gain) {
int new_node_idx = num_leaves_ - 1;
// update parent info
int parent = leaf_parent_[leaf];
......@@ -89,7 +89,7 @@ void Tree::AddPredictionToScore(const Dataset* data, data_size_t num_data, score
iterators.push_back(data->FeatureAt(i)->bin_data()->GetIterator(start));
}
for (data_size_t i = start; i < end; ++i) {
score[i] += leaf_value_[GetLeaf(iterators, i)];
score[i] += static_cast<score_t>(leaf_value_[GetLeaf(iterators, i)]);
}
});
}
......@@ -103,7 +103,7 @@ void Tree::AddPredictionToScore(const Dataset* data, const data_size_t* used_dat
iterators.push_back(data->FeatureAt(i)->bin_data()->GetIterator(used_data_indices[start]));
}
for (data_size_t i = start; i < end; ++i) {
score[used_data_indices[i]] += leaf_value_[GetLeaf(iterators, used_data_indices[i])];
score[used_data_indices[i]] += static_cast<score_t>(leaf_value_[GetLeaf(iterators, used_data_indices[i])]);
}
});
}
......@@ -114,9 +114,9 @@ std::string Tree::ToString() {
ss << "split_feature="
<< Common::ArrayToString<int>(split_feature_real_, num_leaves_ - 1, ' ') << std::endl;
ss << "split_gain="
<< Common::ArrayToString<float>(split_gain_, num_leaves_ - 1, ' ') << std::endl;
<< Common::ArrayToString<double>(split_gain_, num_leaves_ - 1, ' ') << std::endl;
ss << "threshold="
<< Common::ArrayToString<float>(threshold_, num_leaves_ - 1, ' ') << std::endl;
<< Common::ArrayToString<double>(threshold_, num_leaves_ - 1, ' ') << std::endl;
ss << "left_child="
<< Common::ArrayToString<int>(left_child_, num_leaves_ - 1, ' ') << std::endl;
ss << "right_child="
......@@ -124,7 +124,7 @@ std::string Tree::ToString() {
ss << "leaf_parent="
<< Common::ArrayToString<int>(leaf_parent_, num_leaves_, ' ') << std::endl;
ss << "leaf_value="
<< Common::ArrayToString<float>(leaf_value_, num_leaves_, ' ') << std::endl;
<< Common::ArrayToString<double>(leaf_value_, num_leaves_, ' ') << std::endl;
ss << std::endl;
return ss.str();
}
......@@ -154,10 +154,10 @@ Tree::Tree(const std::string& str) {
left_child_ = new int[num_leaves_ - 1];
right_child_ = new int[num_leaves_ - 1];
split_feature_real_ = new int[num_leaves_ - 1];
threshold_ = new float[num_leaves_ - 1];
split_gain_ = new float[num_leaves_ - 1];
threshold_ = new double[num_leaves_ - 1];
split_gain_ = new double[num_leaves_ - 1];
leaf_parent_ = new int[num_leaves_];
leaf_value_ = new float[num_leaves_];
leaf_value_ = new double[num_leaves_];
split_feature_ = nullptr;
threshold_in_bin_ = nullptr;
......@@ -165,9 +165,9 @@ Tree::Tree(const std::string& str) {
Common::StringToIntArray(key_vals["split_feature"], ' ',
num_leaves_ - 1, split_feature_real_);
Common::StringToFloatArray(key_vals["split_gain"], ' ',
Common::StringToDoubleArray(key_vals["split_gain"], ' ',
num_leaves_ - 1, split_gain_);
Common::StringToFloatArray(key_vals["threshold"], ' ',
Common::StringToDoubleArray(key_vals["threshold"], ' ',
num_leaves_ - 1, threshold_);
Common::StringToIntArray(key_vals["left_child"], ' ',
num_leaves_ - 1, left_child_);
......@@ -175,7 +175,7 @@ Tree::Tree(const std::string& str) {
num_leaves_ - 1, right_child_);
Common::StringToIntArray(key_vals["leaf_parent"], ' ',
num_leaves_ , leaf_parent_);
Common::StringToFloatArray(key_vals["leaf_value"], ' ',
Common::StringToDoubleArray(key_vals["leaf_value"], ' ',
num_leaves_ , leaf_value_);
}
......
......@@ -41,7 +41,7 @@ public:
weights_ = metadata.weights();
if (weights_ == nullptr) {
sum_weights_ = static_cast<float>(num_data_);
sum_weights_ = static_cast<double>(num_data_);
} else {
sum_weights_ = 0.0f;
for (data_size_t i = 0; i < num_data; ++i) {
......@@ -58,8 +58,8 @@ public:
return false;
}
std::vector<float> Eval(const score_t* score) const override {
score_t sum_loss = 0.0f;
std::vector<double> Eval(const score_t* score) const override {
double sum_loss = 0.0f;
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for (data_size_t i = 0; i < num_data_; ++i) {
......@@ -77,8 +77,8 @@ public:
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], prob) * weights_[i];
}
}
score_t loss = sum_loss / sum_weights_;
return std::vector<float>(1, static_cast<float>(loss));
double loss = sum_loss / sum_weights_;
return std::vector<double>(1, loss);
}
private:
......@@ -89,7 +89,7 @@ private:
/*! \brief Pointer of weighs */
const float* weights_;
/*! \brief Sum weights */
float sum_weights_;
double sum_weights_;
/*! \brief Name of test set */
std::string name_;
/*! \brief Sigmoid parameter */
......@@ -172,7 +172,7 @@ public:
weights_ = metadata.weights();
if (weights_ == nullptr) {
sum_weights_ = static_cast<float>(num_data_);
sum_weights_ = static_cast<double>(num_data_);
} else {
sum_weights_ = 0.0f;
for (data_size_t i = 0; i < num_data; ++i) {
......@@ -181,7 +181,7 @@ public:
}
}
std::vector<float> Eval(const score_t* score) const override {
std::vector<double> Eval(const score_t* score) const override {
// get indices sorted by score, descent order
std::vector<data_size_t> sorted_idx;
for (data_size_t i = 0; i < num_data_; ++i) {
......@@ -189,13 +189,13 @@ public:
}
std::sort(sorted_idx.begin(), sorted_idx.end(), [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
// temp sum of postive label
score_t cur_pos = 0.0f;
double cur_pos = 0.0f;
// total sum of postive label
score_t sum_pos = 0.0f;
double sum_pos = 0.0f;
// accumlate of auc
score_t accum = 0.0f;
double accum = 0.0f;
// temp sum of negative label
score_t cur_neg = 0.0f;
double cur_neg = 0.0f;
score_t threshold = score[sorted_idx[0]];
if (weights_ == nullptr) { // no weights
for (data_size_t i = 0; i < num_data_; ++i) {
......@@ -233,11 +233,11 @@ public:
}
accum += cur_neg*(cur_pos * 0.5f + sum_pos);
sum_pos += cur_pos;
score_t auc = 1.0f;
double auc = 1.0f;
if (sum_pos > 0.0f && sum_pos != sum_weights_) {
auc = accum / (sum_pos *(sum_weights_ - sum_pos));
}
return std::vector<float>(1, static_cast<float>(auc));
return std::vector<double>(1, auc);
}
private:
......@@ -248,7 +248,7 @@ private:
/*! \brief Pointer of weighs */
const float* weights_;
/*! \brief Sum weights */
float sum_weights_;
double sum_weights_;
/*! \brief Name of test set */
std::string name_;
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment