Commit 3b50aeac authored by Guolin Ke's avatar Guolin Ke
Browse files

merge from master

parents 504d400c c96ae6af
...@@ -56,7 +56,7 @@ public: ...@@ -56,7 +56,7 @@ public:
/*! \brief True if bin is trival (contains only one bin) */ /*! \brief True if bin is trival (contains only one bin) */
inline bool is_trival() const { return is_trival_; } inline bool is_trival() const { return is_trival_; }
/*! \brief Sparsity of this bin ( num_zero_bins / num_data ) */ /*! \brief Sparsity of this bin ( num_zero_bins / num_data ) */
inline float sparse_rate() const { return sparse_rate_; } inline double sparse_rate() const { return sparse_rate_; }
/*! /*!
* \brief Save binary data to file * \brief Save binary data to file
* \param file File want to write * \param file File want to write
...@@ -67,7 +67,7 @@ public: ...@@ -67,7 +67,7 @@ public:
* \param bin * \param bin
* \return Feature value of this bin * \return Feature value of this bin
*/ */
inline float BinToValue(unsigned int bin) const { inline double BinToValue(unsigned int bin) const {
return bin_upper_bound_[bin]; return bin_upper_bound_[bin];
} }
/*! /*!
...@@ -79,14 +79,14 @@ public: ...@@ -79,14 +79,14 @@ public:
* \param value * \param value
* \return bin for this feature value * \return bin for this feature value
*/ */
inline unsigned int ValueToBin(float value) const; inline unsigned int ValueToBin(double value) const;
/*! /*!
* \brief Construct feature value to bin mapper according feature values * \brief Construct feature value to bin mapper according feature values
* \param values (Sampled) values of this feature * \param values (Sampled) values of this feature
* \param max_bin The maximal number of bin * \param max_bin The maximal number of bin
*/ */
void FindBin(std::vector<float>* values, int max_bin); void FindBin(std::vector<double>* values, int max_bin);
/*! /*!
* \brief Use specific number of bin to calculate the size of this class * \brief Use specific number of bin to calculate the size of this class
...@@ -111,11 +111,11 @@ private: ...@@ -111,11 +111,11 @@ private:
/*! \brief Number of bins */ /*! \brief Number of bins */
int num_bin_; int num_bin_;
/*! \brief Store upper bound for each bin */ /*! \brief Store upper bound for each bin */
float* bin_upper_bound_; double* bin_upper_bound_;
/*! \brief True if this feature is trival */ /*! \brief True if this feature is trival */
bool is_trival_; bool is_trival_;
/*! \brief Sparse rate of this bins( num_bin0/num_data ) */ /*! \brief Sparse rate of this bins( num_bin0/num_data ) */
float sparse_rate_; double sparse_rate_;
}; };
/*! /*!
...@@ -271,7 +271,7 @@ public: ...@@ -271,7 +271,7 @@ public:
* \return The bin data object * \return The bin data object
*/ */
static Bin* CreateBin(data_size_t num_data, int num_bin, static Bin* CreateBin(data_size_t num_data, int num_bin,
float sparse_rate, bool is_enable_sparse, bool* is_sparse, int default_bin); double sparse_rate, bool is_enable_sparse, bool* is_sparse, int default_bin);
/*! /*!
* \brief Create object for bin data of one feature, used for dense feature * \brief Create object for bin data of one feature, used for dense feature
...@@ -293,7 +293,7 @@ public: ...@@ -293,7 +293,7 @@ public:
int num_bin, int default_bin); int num_bin, int default_bin);
}; };
inline unsigned int BinMapper::ValueToBin(float value) const { inline unsigned int BinMapper::ValueToBin(double value) const {
// binary search to find bin // binary search to find bin
int l = 0; int l = 0;
int r = num_bin_ - 1; int r = num_bin_ - 1;
......
...@@ -58,7 +58,7 @@ public: ...@@ -58,7 +58,7 @@ public:
* \param num_used_model Number of used model * \param num_used_model Number of used model
* \return Prediction result for this record * \return Prediction result for this record
*/ */
virtual float PredictRaw(const float* feature_values, virtual double PredictRaw(const double* feature_values,
int num_used_model) const = 0; int num_used_model) const = 0;
/*! /*!
...@@ -67,7 +67,7 @@ public: ...@@ -67,7 +67,7 @@ public:
* \param num_used_model Number of used model * \param num_used_model Number of used model
* \return Prediction result for this record * \return Prediction result for this record
*/ */
virtual float Predict(const float* feature_values, virtual double Predict(const double* feature_values,
int num_used_model) const = 0; int num_used_model) const = 0;
/*! /*!
...@@ -77,7 +77,7 @@ public: ...@@ -77,7 +77,7 @@ public:
* \return Predicted leaf index for this record * \return Predicted leaf index for this record
*/ */
virtual std::vector<int> PredictLeafIndex( virtual std::vector<int> PredictLeafIndex(
const float* feature_values, const double* feature_values,
int num_used_model) const = 0; int num_used_model) const = 0;
/*! /*!
...@@ -85,7 +85,7 @@ public: ...@@ -85,7 +85,7 @@ public:
* \param feature_values Feature value on this record * \param feature_values Feature value on this record
* \return Prediction result, num_class numbers per line * \return Prediction result, num_class numbers per line
*/ */
virtual std::vector<float> PredictMulticlass(const float* value, int num_used_model) const = 0; virtual std::vector<double> PredictMulticlass(const double* value, int num_used_model) const = 0;
/*! /*!
* \brief save model to file * \brief save model to file
......
...@@ -49,15 +49,15 @@ public: ...@@ -49,15 +49,15 @@ public:
const std::string& name, int* out); const std::string& name, int* out);
/*! /*!
* \brief Get float value by specific name of key * \brief Get double value by specific name of key
* \param params Store the key and value for params * \param params Store the key and value for params
* \param name Name of key * \param name Name of key
* \param out Value will assign to out if key exists * \param out Value will assign to out if key exists
* \return True if key exists * \return True if key exists
*/ */
inline bool GetFloat( inline bool GetDouble(
const std::unordered_map<std::string, std::string>& params, const std::unordered_map<std::string, std::string>& params,
const std::string& name, float* out); const std::string& name, double* out);
/*! /*!
* \brief Get bool value by specific name of key * \brief Get bool value by specific name of key
...@@ -123,9 +123,9 @@ public: ...@@ -123,9 +123,9 @@ public:
struct ObjectiveConfig: public ConfigBase { struct ObjectiveConfig: public ConfigBase {
public: public:
virtual ~ObjectiveConfig() {} virtual ~ObjectiveConfig() {}
float sigmoid = 1.0f; double sigmoid = 1.0f;
// for lambdarank // for lambdarank
std::vector<float> label_gain; std::vector<double> label_gain;
// for lambdarank // for lambdarank
int max_position = 20; int max_position = 20;
// for binary // for binary
...@@ -140,8 +140,8 @@ struct MetricConfig: public ConfigBase { ...@@ -140,8 +140,8 @@ struct MetricConfig: public ConfigBase {
public: public:
virtual ~MetricConfig() {} virtual ~MetricConfig() {}
int num_class = 1; int num_class = 1;
float sigmoid = 1.0f; double sigmoid = 1.0f;
std::vector<float> label_gain; std::vector<double> label_gain;
std::vector<int> eval_at; std::vector<int> eval_at;
void Set(const std::unordered_map<std::string, std::string>& params) override; void Set(const std::unordered_map<std::string, std::string>& params) override;
}; };
...@@ -151,13 +151,13 @@ public: ...@@ -151,13 +151,13 @@ public:
struct TreeConfig: public ConfigBase { struct TreeConfig: public ConfigBase {
public: public:
int min_data_in_leaf = 100; int min_data_in_leaf = 100;
float min_sum_hessian_in_leaf = 10.0f; double min_sum_hessian_in_leaf = 10.0f;
// should > 1, only one leaf means not need to learning // should > 1, only one leaf means not need to learning
int num_leaves = 127; int num_leaves = 127;
int feature_fraction_seed = 2; int feature_fraction_seed = 2;
float feature_fraction = 1.0f; double feature_fraction = 1.0f;
// max cache size(unit:MB) for historical histogram. < 0 means not limit // max cache size(unit:MB) for historical histogram. < 0 means not limit
float histogram_pool_size = -1.0f; double histogram_pool_size = -1.0f;
// max depth of tree model. // max depth of tree model.
// Still grow tree by leaf-wise, but limit the max depth to avoid over-fitting // Still grow tree by leaf-wise, but limit the max depth to avoid over-fitting
// And the max leaves will be min(num_leaves, pow(2, max_depth - 1)) // And the max leaves will be min(num_leaves, pow(2, max_depth - 1))
...@@ -179,8 +179,8 @@ public: ...@@ -179,8 +179,8 @@ public:
int output_freq = 1; int output_freq = 1;
bool is_provide_training_metric = false; bool is_provide_training_metric = false;
int num_iterations = 10; int num_iterations = 10;
float learning_rate = 0.1f; double learning_rate = 0.1f;
float bagging_fraction = 1.0f; double bagging_fraction = 1.0f;
int bagging_seed = 3; int bagging_seed = 3;
int bagging_freq = 0; int bagging_freq = 0;
int early_stopping_round = 0; int early_stopping_round = 0;
...@@ -268,12 +268,12 @@ inline bool ConfigBase::GetInt( ...@@ -268,12 +268,12 @@ inline bool ConfigBase::GetInt(
return false; return false;
} }
inline bool ConfigBase::GetFloat( inline bool ConfigBase::GetDouble(
const std::unordered_map<std::string, std::string>& params, const std::unordered_map<std::string, std::string>& params,
const std::string& name, float* out) { const std::string& name, double* out) {
if (params.count(name) > 0) { if (params.count(name) > 0) {
if (!Common::AtofAndCheck(params.at(name).c_str(), out)) { if (!Common::AtofAndCheck(params.at(name).c_str(), out)) {
Log::Fatal("Parameter %s should be float type, passed is [%s]", Log::Fatal("Parameter %s should be double type, passed is [%s]",
name.c_str(), params.at(name).c_str()); name.c_str(), params.at(name).c_str());
} }
return true; return true;
......
...@@ -227,7 +227,7 @@ public: ...@@ -227,7 +227,7 @@ public:
* \param out_label Label will store to this if exists * \param out_label Label will store to this if exists
*/ */
virtual void ParseOneLine(const char* str, virtual void ParseOneLine(const char* str,
std::vector<std::pair<int, float>>* out_features, float* out_label) const = 0; std::vector<std::pair<int, double>>* out_features, double* out_label) const = 0;
/*! /*!
* \brief Create a object of parser, will auto choose the format depend on file * \brief Create a object of parser, will auto choose the format depend on file
...@@ -240,7 +240,7 @@ public: ...@@ -240,7 +240,7 @@ public:
}; };
using PredictFunction = using PredictFunction =
std::function<float(const std::vector<std::pair<int, float>>&)>; std::function<double(const std::vector<std::pair<int, double>>&)>;
/*! \brief The main class of data set, /*! \brief The main class of data set,
* which are used to traning or validation * which are used to traning or validation
......
...@@ -71,7 +71,7 @@ public: ...@@ -71,7 +71,7 @@ public:
* \param idx Index of record * \param idx Index of record
* \param value feature value of record * \param value feature value of record
*/ */
inline void PushData(int tid, data_size_t line_idx, float value) { inline void PushData(int tid, data_size_t line_idx, double value) {
unsigned int bin = bin_mapper_->ValueToBin(value); unsigned int bin = bin_mapper_->ValueToBin(value);
bin_data_->Push(tid, line_idx, bin); bin_data_->Push(tid, line_idx, bin);
} }
...@@ -89,7 +89,7 @@ public: ...@@ -89,7 +89,7 @@ public:
* \param bin * \param bin
* \return Feature value of this bin * \return Feature value of this bin
*/ */
inline float BinToValue(unsigned int bin) inline double BinToValue(unsigned int bin)
const { return bin_mapper_->BinToValue(bin); } const { return bin_mapper_->BinToValue(bin); }
/*! /*!
......
...@@ -34,7 +34,7 @@ public: ...@@ -34,7 +34,7 @@ public:
* \brief Calcaluting and printing metric result * \brief Calcaluting and printing metric result
* \param score Current prediction score * \param score Current prediction score
*/ */
virtual std::vector<float> Eval(const score_t* score) const = 0; virtual std::vector<double> Eval(const score_t* score) const = 0;
/*! /*!
* \brief Create object of metrics * \brief Create object of metrics
...@@ -54,7 +54,7 @@ public: ...@@ -54,7 +54,7 @@ public:
* \brief Initial logic * \brief Initial logic
* \param label_gain Gain for labels, default is 2^i - 1 * \param label_gain Gain for labels, default is 2^i - 1
*/ */
static void Init(std::vector<float> label_gain); static void Init(std::vector<double> label_gain);
/*! /*!
* \brief Calculate the DCG score at position k * \brief Calculate the DCG score at position k
...@@ -64,7 +64,7 @@ public: ...@@ -64,7 +64,7 @@ public:
* \param num_data Number of data * \param num_data Number of data
* \return The DCG score * \return The DCG score
*/ */
static float CalDCGAtK(data_size_t k, const float* label, static score_t CalDCGAtK(data_size_t k, const float* label,
const score_t* score, data_size_t num_data); const score_t* score, data_size_t num_data);
/*! /*!
...@@ -77,7 +77,7 @@ public: ...@@ -77,7 +77,7 @@ public:
*/ */
static void CalDCG(const std::vector<data_size_t>& ks, static void CalDCG(const std::vector<data_size_t>& ks,
const float* label, const score_t* score, const float* label, const score_t* score,
data_size_t num_data, std::vector<float>* out); data_size_t num_data, std::vector<score_t>* out);
/*! /*!
* \brief Calculate the Max DCG score at position k * \brief Calculate the Max DCG score at position k
...@@ -86,7 +86,7 @@ public: ...@@ -86,7 +86,7 @@ public:
* \param num_data Number of data * \param num_data Number of data
* \return The max DCG score * \return The max DCG score
*/ */
static float CalMaxDCGAtK(data_size_t k, static score_t CalMaxDCGAtK(data_size_t k,
const float* label, data_size_t num_data); const float* label, data_size_t num_data);
/*! /*!
...@@ -97,22 +97,22 @@ public: ...@@ -97,22 +97,22 @@ public:
* \param out Output result * \param out Output result
*/ */
static void CalMaxDCG(const std::vector<data_size_t>& ks, static void CalMaxDCG(const std::vector<data_size_t>& ks,
const float* label, data_size_t num_data, std::vector<float>* out); const float* label, data_size_t num_data, std::vector<score_t>* out);
/*! /*!
* \brief Get discount score of position k * \brief Get discount score of position k
* \param k The position * \param k The position
* \return The discount of this position * \return The discount of this position
*/ */
inline static float GetDiscount(data_size_t k) { return discount_[k]; } inline static score_t GetDiscount(data_size_t k) { return discount_[k]; }
private: private:
/*! \brief True if inited, avoid init multi times */ /*! \brief True if inited, avoid init multi times */
static bool is_inited_; static bool is_inited_;
/*! \brief store gains for different label */ /*! \brief store gains for different label */
static std::vector<float> label_gain_; static std::vector<score_t> label_gain_;
/*! \brief store discount score for different position */ /*! \brief store discount score for different position */
static std::vector<float> discount_; static std::vector<score_t> discount_;
/*! \brief max position for eval */ /*! \brief max position for eval */
static const data_size_t kMaxPosition; static const data_size_t kMaxPosition;
}; };
......
...@@ -36,7 +36,7 @@ public: ...@@ -36,7 +36,7 @@ public:
* This function is used for prediction task, if has sigmoid param, the prediction value will be transform by sigmoid function. * This function is used for prediction task, if has sigmoid param, the prediction value will be transform by sigmoid function.
* \return Sigmoid param, if <=0.0 means don't use sigmoid transform on this objective. * \return Sigmoid param, if <=0.0 means don't use sigmoid transform on this objective.
*/ */
virtual float GetSigmoid() const = 0; virtual score_t GetSigmoid() const = 0;
/*! /*!
* \brief Create object of objective function * \brief Create object of objective function
......
...@@ -36,18 +36,18 @@ public: ...@@ -36,18 +36,18 @@ public:
* \param feature Index of feature; the converted index after removing useless features * \param feature Index of feature; the converted index after removing useless features
* \param threshold Threshold(bin) of split * \param threshold Threshold(bin) of split
* \param real_feature Index of feature, the original index on data * \param real_feature Index of feature, the original index on data
* \param threshold_float Threshold on feature value * \param threshold_double Threshold on feature value
* \param left_value Model Left child output * \param left_value Model Left child output
* \param right_value Model Right child output * \param right_value Model Right child output
* \param gain Split gain * \param gain Split gain
* \return The index of new leaf. * \return The index of new leaf.
*/ */
int Split(int leaf, int feature, unsigned int threshold, int real_feature, int Split(int leaf, int feature, unsigned int threshold, int real_feature,
float threshold_float, float left_value, double threshold_double, double left_value,
float right_value, float gain); double right_value, double gain);
/*! \brief Get the output of one leave */ /*! \brief Get the output of one leave */
inline float LeafOutput(int leaf) const { return leaf_value_[leaf]; } inline double LeafOutput(int leaf) const { return leaf_value_[leaf]; }
/*! /*!
* \brief Adding prediction value of this tree model to scores * \brief Adding prediction value of this tree model to scores
...@@ -74,8 +74,8 @@ public: ...@@ -74,8 +74,8 @@ public:
* \param feature_values Feature value of this record * \param feature_values Feature value of this record
* \return Prediction result * \return Prediction result
*/ */
inline float Predict(const float* feature_values) const; inline double Predict(const double* feature_values) const;
inline int PredictLeafIndex(const float* feature_values) const; inline int PredictLeafIndex(const double* feature_values) const;
/*! \brief Get Number of leaves*/ /*! \brief Get Number of leaves*/
inline int num_leaves() const { return num_leaves_; } inline int num_leaves() const { return num_leaves_; }
...@@ -91,7 +91,7 @@ public: ...@@ -91,7 +91,7 @@ public:
* shrinkage rate (a.k.a learning rate) is used to tune the traning process * shrinkage rate (a.k.a learning rate) is used to tune the traning process
* \param rate The factor of shrinkage * \param rate The factor of shrinkage
*/ */
inline void Shrinkage(float rate) { inline void Shrinkage(double rate) {
for (int i = 0; i < num_leaves_; ++i) { for (int i = 0; i < num_leaves_; ++i) {
leaf_value_[i] = leaf_value_[i] * rate; leaf_value_[i] = leaf_value_[i] * rate;
} }
...@@ -119,7 +119,7 @@ private: ...@@ -119,7 +119,7 @@ private:
* \param feature_values Feature value of this record * \param feature_values Feature value of this record
* \return Leaf index * \return Leaf index
*/ */
inline int GetLeaf(const float* feature_values) const; inline int GetLeaf(const double* feature_values) const;
/*! \brief Number of max leaves*/ /*! \brief Number of max leaves*/
int max_leaves_; int max_leaves_;
...@@ -137,25 +137,25 @@ private: ...@@ -137,25 +137,25 @@ private:
/*! \brief A non-leaf node's split threshold in bin */ /*! \brief A non-leaf node's split threshold in bin */
unsigned int* threshold_in_bin_; unsigned int* threshold_in_bin_;
/*! \brief A non-leaf node's split threshold in feature value */ /*! \brief A non-leaf node's split threshold in feature value */
float* threshold_; double* threshold_;
/*! \brief A non-leaf node's split gain */ /*! \brief A non-leaf node's split gain */
float* split_gain_; double* split_gain_;
// used for leaf node // used for leaf node
/*! \brief The parent of leaf */ /*! \brief The parent of leaf */
int* leaf_parent_; int* leaf_parent_;
/*! \brief Output of leaves */ /*! \brief Output of leaves */
float* leaf_value_; double* leaf_value_;
/*! \brief Depth for leaves */ /*! \brief Depth for leaves */
int* leaf_depth_; int* leaf_depth_;
}; };
inline float Tree::Predict(const float* feature_values) const { inline double Tree::Predict(const double* feature_values) const {
int leaf = GetLeaf(feature_values); int leaf = GetLeaf(feature_values);
return LeafOutput(leaf); return LeafOutput(leaf);
} }
inline int Tree::PredictLeafIndex(const float* feature_values) const { inline int Tree::PredictLeafIndex(const double* feature_values) const {
int leaf = GetLeaf(feature_values); int leaf = GetLeaf(feature_values);
return leaf; return leaf;
} }
...@@ -174,7 +174,7 @@ inline int Tree::GetLeaf(const std::vector<BinIterator*>& iterators, ...@@ -174,7 +174,7 @@ inline int Tree::GetLeaf(const std::vector<BinIterator*>& iterators,
return ~node; return ~node;
} }
inline int Tree::GetLeaf(const float* feature_values) const { inline int Tree::GetLeaf(const double* feature_values) const {
int node = 0; int node = 0;
while (node >= 0) { while (node >= 0) {
if (feature_values[split_feature_real_[node]] <= threshold_[node]) { if (feature_values[split_feature_real_[node]] <= threshold_[node]) {
......
...@@ -102,10 +102,9 @@ inline static const char* Atoi(const char* p, int* out) { ...@@ -102,10 +102,9 @@ inline static const char* Atoi(const char* p, int* out) {
return p; return p;
} }
//ref to http://www.leapsecond.com/tools/fast_atof.c inline static const char* Atof(const char* p, double* out) {
inline static const char* Atof(const char* p, float* out) {
int frac; int frac;
float sign, value, scale; double sign, value, scale;
*out = 0; *out = 0;
// Skip leading white space, if any. // Skip leading white space, if any.
while (*p == ' ') { while (*p == ' ') {
...@@ -113,9 +112,9 @@ inline static const char* Atof(const char* p, float* out) { ...@@ -113,9 +112,9 @@ inline static const char* Atof(const char* p, float* out) {
} }
// Get sign, if any. // Get sign, if any.
sign = 1.0f; sign = 1.0;
if (*p == '-') { if (*p == '-') {
sign = -1.0f; sign = -1.0;
++p; ++p;
} else if (*p == '+') { } else if (*p == '+') {
++p; ++p;
...@@ -124,24 +123,24 @@ inline static const char* Atof(const char* p, float* out) { ...@@ -124,24 +123,24 @@ inline static const char* Atof(const char* p, float* out) {
// is a number // is a number
if ((*p >= '0' && *p <= '9') || *p == '.' || *p == 'e' || *p == 'E') { if ((*p >= '0' && *p <= '9') || *p == '.' || *p == 'e' || *p == 'E') {
// Get digits before decimal point or exponent, if any. // Get digits before decimal point or exponent, if any.
for (value = 0.0f; *p >= '0' && *p <= '9'; ++p) { for (value = 0.0; *p >= '0' && *p <= '9'; ++p) {
value = value * 10.0f + (*p - '0'); value = value * 10.0 + (*p - '0');
} }
// Get digits after decimal point, if any. // Get digits after decimal point, if any.
if (*p == '.') { if (*p == '.') {
float pow10 = 10.0f; double pow10 = 10.0;
++p; ++p;
while (*p >= '0' && *p <= '9') { while (*p >= '0' && *p <= '9') {
value += (*p - '0') / pow10; value += (*p - '0') / pow10;
pow10 *= 10.0f; pow10 *= 10.0;
++p; ++p;
} }
} }
// Handle exponent, if any. // Handle exponent, if any.
frac = 0; frac = 0;
scale = 1.0f; scale = 1.0;
if ((*p == 'e') || (*p == 'E')) { if ((*p == 'e') || (*p == 'E')) {
unsigned int expon; unsigned int expon;
// Get sign of exponent, if any. // Get sign of exponent, if any.
...@@ -156,9 +155,11 @@ inline static const char* Atof(const char* p, float* out) { ...@@ -156,9 +155,11 @@ inline static const char* Atof(const char* p, float* out) {
for (expon = 0; *p >= '0' && *p <= '9'; ++p) { for (expon = 0; *p >= '0' && *p <= '9'; ++p) {
expon = expon * 10 + (*p - '0'); expon = expon * 10 + (*p - '0');
} }
if (expon > 38) expon = 38; if (expon > 308) expon = 308;
// Calculate scaling factor.
while (expon >= 50) { scale *= 1E50; expon -= 50; }
while (expon >= 8) { scale *= 1E8; expon -= 8; } while (expon >= 8) { scale *= 1E8; expon -= 8; }
while (expon > 0) { scale *= 10.0f; expon -= 1; } while (expon > 0) { scale *= 10.0; expon -= 1; }
} }
// Return signed and scaled floating point result. // Return signed and scaled floating point result.
*out = sign * (frac ? (value / scale) : (value * scale)); *out = sign * (frac ? (value / scale) : (value * scale));
...@@ -174,9 +175,9 @@ inline static const char* Atof(const char* p, float* out) { ...@@ -174,9 +175,9 @@ inline static const char* Atof(const char* p, float* out) {
std::string tmp_str(p, cnt); std::string tmp_str(p, cnt);
std::transform(tmp_str.begin(), tmp_str.end(), tmp_str.begin(), ::tolower); std::transform(tmp_str.begin(), tmp_str.end(), tmp_str.begin(), ::tolower);
if (tmp_str == std::string("na") || tmp_str == std::string("nan")) { if (tmp_str == std::string("na") || tmp_str == std::string("nan")) {
*out = 0.0f; *out = 0;
} else if (tmp_str == std::string("inf") || tmp_str == std::string("infinity")) { } else if (tmp_str == std::string("inf") || tmp_str == std::string("infinity")) {
*out = sign * static_cast<float>(1e38); *out = sign * 1e308;
} else { } else {
Log::Fatal("Unknow token %s in data file", tmp_str.c_str()); Log::Fatal("Unknow token %s in data file", tmp_str.c_str());
} }
...@@ -191,6 +192,8 @@ inline static const char* Atof(const char* p, float* out) { ...@@ -191,6 +192,8 @@ inline static const char* Atof(const char* p, float* out) {
return p; return p;
} }
inline bool AtoiAndCheck(const char* p, int* out) { inline bool AtoiAndCheck(const char* p, int* out) {
const char* after = Atoi(p, out); const char* after = Atoi(p, out);
if (*after != '\0') { if (*after != '\0') {
...@@ -199,7 +202,7 @@ inline bool AtoiAndCheck(const char* p, int* out) { ...@@ -199,7 +202,7 @@ inline bool AtoiAndCheck(const char* p, int* out) {
return true; return true;
} }
inline bool AtofAndCheck(const char* p, float* out) { inline bool AtofAndCheck(const char* p, double* out) {
const char* after = Atof(p, out); const char* after = Atof(p, out);
if (*after != '\0') { if (*after != '\0') {
return false; return false;
...@@ -260,10 +263,11 @@ inline static void StringToIntArray(const std::string& str, char delimiter, size ...@@ -260,10 +263,11 @@ inline static void StringToIntArray(const std::string& str, char delimiter, size
} }
} }
inline static void StringToFloatArray(const std::string& str, char delimiter, size_t n, float* out) {
inline static void StringToDoubleArray(const std::string& str, char delimiter, size_t n, double* out) {
std::vector<std::string> strs = Split(str.c_str(), delimiter); std::vector<std::string> strs = Split(str.c_str(), delimiter);
if (strs.size() != n) { if (strs.size() != n) {
Log::Fatal("StringToFloatArray error, size doesn't matched."); Log::Fatal("StringToDoubleArray error, size doesn't matched.");
} }
for (size_t i = 0; i < strs.size(); ++i) { for (size_t i = 0; i < strs.size(); ++i) {
strs[i] = Trim(strs[i]); strs[i] = Trim(strs[i]);
...@@ -271,12 +275,12 @@ inline static void StringToFloatArray(const std::string& str, char delimiter, si ...@@ -271,12 +275,12 @@ inline static void StringToFloatArray(const std::string& str, char delimiter, si
} }
} }
inline static std::vector<float> StringToFloatArray(const std::string& str, char delimiter) { inline static std::vector<double> StringToDoubleArray(const std::string& str, char delimiter) {
std::vector<std::string> strs = Split(str.c_str(), delimiter); std::vector<std::string> strs = Split(str.c_str(), delimiter);
std::vector<float> ret; std::vector<double> ret;
for (size_t i = 0; i < strs.size(); ++i) { for (size_t i = 0; i < strs.size(); ++i) {
strs[i] = Trim(strs[i]); strs[i] = Trim(strs[i]);
float val = 0.0f; double val = 0.0f;
Atof(strs[i].c_str(), &val); Atof(strs[i].c_str(), &val);
ret.push_back(val); ret.push_back(val);
} }
...@@ -338,19 +342,19 @@ static inline int64_t Pow2RoundUp(int64_t x) { ...@@ -338,19 +342,19 @@ static inline int64_t Pow2RoundUp(int64_t x) {
* \brief Do inplace softmax transformaton on p_rec * \brief Do inplace softmax transformaton on p_rec
* \param p_rec The input/output vector of the values. * \param p_rec The input/output vector of the values.
*/ */
inline void Softmax(std::vector<float>* p_rec) { inline void Softmax(std::vector<double>* p_rec) {
std::vector<float> &rec = *p_rec; std::vector<double> &rec = *p_rec;
float wmax = rec[0]; double wmax = rec[0];
for (size_t i = 1; i < rec.size(); ++i) { for (size_t i = 1; i < rec.size(); ++i) {
wmax = std::max(rec[i], wmax); wmax = std::max(rec[i], wmax);
} }
float wsum = 0.0f; double wsum = 0.0f;
for (size_t i = 0; i < rec.size(); ++i) { for (size_t i = 0; i < rec.size(); ++i) {
rec[i] = std::exp(rec[i] - wmax); rec[i] = std::exp(rec[i] - wmax);
wsum += rec[i]; wsum += rec[i];
} }
for (size_t i = 0; i < rec.size(); ++i) { for (size_t i = 0; i < rec.size(); ++i) {
rec[i] /= static_cast<float>(wsum); rec[i] /= static_cast<double>(wsum);
} }
} }
......
...@@ -125,7 +125,7 @@ void Application::LoadData() { ...@@ -125,7 +125,7 @@ void Application::LoadData() {
if (boosting_->NumberOfSubModels() > 0) { if (boosting_->NumberOfSubModels() > 0) {
predictor = new Predictor(boosting_, config_.io_config.is_sigmoid, config_.predict_leaf_index, -1); predictor = new Predictor(boosting_, config_.io_config.is_sigmoid, config_.predict_leaf_index, -1);
predict_fun = predict_fun =
[&predictor](const std::vector<std::pair<int, float>>& features) { [&predictor](const std::vector<std::pair<int, double>>& features) {
return predictor->PredictRawOneLine(features); return predictor->PredictRawOneLine(features);
}; };
} }
...@@ -210,7 +210,7 @@ void Application::InitTrain() { ...@@ -210,7 +210,7 @@ void Application::InitTrain() {
gbdt_config->tree_config.feature_fraction_seed = gbdt_config->tree_config.feature_fraction_seed =
GlobalSyncUpByMin<int>(gbdt_config->tree_config.feature_fraction_seed); GlobalSyncUpByMin<int>(gbdt_config->tree_config.feature_fraction_seed);
gbdt_config->tree_config.feature_fraction = gbdt_config->tree_config.feature_fraction =
GlobalSyncUpByMin<float>(gbdt_config->tree_config.feature_fraction); GlobalSyncUpByMin<double>(gbdt_config->tree_config.feature_fraction);
} }
} }
// create boosting // create boosting
......
...@@ -39,9 +39,9 @@ public: ...@@ -39,9 +39,9 @@ public:
{ {
num_threads_ = omp_get_num_threads(); num_threads_ = omp_get_num_threads();
} }
features_ = new float*[num_threads_]; features_ = new double*[num_threads_];
for (int i = 0; i < num_threads_; ++i) { for (int i = 0; i < num_threads_; ++i) {
features_[i] = new float[num_features_]; features_[i] = new double[num_features_];
} }
} }
/*! /*!
...@@ -61,7 +61,7 @@ public: ...@@ -61,7 +61,7 @@ public:
* \param features Feature for this record * \param features Feature for this record
* \return Prediction result * \return Prediction result
*/ */
float PredictRawOneLine(const std::vector<std::pair<int, float>>& features) { double PredictRawOneLine(const std::vector<std::pair<int, double>>& features) {
const int tid = PutFeatureValuesToBuffer(features); const int tid = PutFeatureValuesToBuffer(features);
// get result without sigmoid transformation // get result without sigmoid transformation
return boosting_->PredictRaw(features_[tid], num_used_model_); return boosting_->PredictRaw(features_[tid], num_used_model_);
...@@ -72,7 +72,7 @@ public: ...@@ -72,7 +72,7 @@ public:
* \param features Feature for this record * \param features Feature for this record
* \return Predictied leaf index * \return Predictied leaf index
*/ */
std::vector<int> PredictLeafIndexOneLine(const std::vector<std::pair<int, float>>& features) { std::vector<int> PredictLeafIndexOneLine(const std::vector<std::pair<int, double>>& features) {
const int tid = PutFeatureValuesToBuffer(features); const int tid = PutFeatureValuesToBuffer(features);
// get result for leaf index // get result for leaf index
return boosting_->PredictLeafIndex(features_[tid], num_used_model_); return boosting_->PredictLeafIndex(features_[tid], num_used_model_);
...@@ -83,7 +83,7 @@ public: ...@@ -83,7 +83,7 @@ public:
* \param features Feature of this record * \param features Feature of this record
* \return Prediction result * \return Prediction result
*/ */
float PredictOneLine(const std::vector<std::pair<int, float>>& features) { double PredictOneLine(const std::vector<std::pair<int, double>>& features) {
const int tid = PutFeatureValuesToBuffer(features); const int tid = PutFeatureValuesToBuffer(features);
// get result with sigmoid transform if needed // get result with sigmoid transform if needed
return boosting_->Predict(features_[tid], num_used_model_); return boosting_->Predict(features_[tid], num_used_model_);
...@@ -94,7 +94,7 @@ public: ...@@ -94,7 +94,7 @@ public:
* \param features Feature of this record * \param features Feature of this record
* \return Prediction result * \return Prediction result
*/ */
std::vector<float> PredictMulticlassOneLine(const std::vector<std::pair<int, float>>& features) { std::vector<double> PredictMulticlassOneLine(const std::vector<std::pair<int, double>>& features) {
const int tid = PutFeatureValuesToBuffer(features); const int tid = PutFeatureValuesToBuffer(features);
// get result with sigmoid transform if needed // get result with sigmoid transform if needed
return boosting_->PredictMulticlass(features_[tid], num_used_model_); return boosting_->PredictMulticlass(features_[tid], num_used_model_);
...@@ -125,17 +125,17 @@ public: ...@@ -125,17 +125,17 @@ public:
} }
// function for parse data // function for parse data
std::function<void(const char*, std::vector<std::pair<int, float>>*)> parser_fun; std::function<void(const char*, std::vector<std::pair<int, double>>*)> parser_fun;
float tmp_label; double tmp_label;
parser_fun = [this, &parser, &tmp_label] parser_fun = [this, &parser, &tmp_label]
(const char* buffer, std::vector<std::pair<int, float>>* feature) { (const char* buffer, std::vector<std::pair<int, double>>* feature) {
parser->ParseOneLine(buffer, feature, &tmp_label); parser->ParseOneLine(buffer, feature, &tmp_label);
}; };
std::function<std::string(const std::vector<std::pair<int, float>>&)> predict_fun; std::function<std::string(const std::vector<std::pair<int, double>>&)> predict_fun;
if (num_class_ > 1) { if (num_class_ > 1) {
predict_fun = [this](const std::vector<std::pair<int, float>>& features){ predict_fun = [this](const std::vector<std::pair<int, double>>& features){
std::vector<float> prediction = PredictMulticlassOneLine(features); std::vector<double> prediction = PredictMulticlassOneLine(features);
std::stringstream result_stream_buf; std::stringstream result_stream_buf;
for (size_t i = 0; i < prediction.size(); ++i){ for (size_t i = 0; i < prediction.size(); ++i){
if (i > 0) { if (i > 0) {
...@@ -147,7 +147,7 @@ public: ...@@ -147,7 +147,7 @@ public:
}; };
} }
else if (is_predict_leaf_index_) { else if (is_predict_leaf_index_) {
predict_fun = [this](const std::vector<std::pair<int, float>>& features){ predict_fun = [this](const std::vector<std::pair<int, double>>& features){
std::vector<int> predicted_leaf_index = PredictLeafIndexOneLine(features); std::vector<int> predicted_leaf_index = PredictLeafIndexOneLine(features);
std::stringstream result_stream_buf; std::stringstream result_stream_buf;
for (size_t i = 0; i < predicted_leaf_index.size(); ++i){ for (size_t i = 0; i < predicted_leaf_index.size(); ++i){
...@@ -161,12 +161,12 @@ public: ...@@ -161,12 +161,12 @@ public:
} }
else { else {
if (is_simgoid_) { if (is_simgoid_) {
predict_fun = [this](const std::vector<std::pair<int, float>>& features){ predict_fun = [this](const std::vector<std::pair<int, double>>& features){
return std::to_string(PredictOneLine(features)); return std::to_string(PredictOneLine(features));
}; };
} }
else { else {
predict_fun = [this](const std::vector<std::pair<int, float>>& features){ predict_fun = [this](const std::vector<std::pair<int, double>>& features){
return std::to_string(PredictRawOneLine(features)); return std::to_string(PredictRawOneLine(features));
}; };
} }
...@@ -174,7 +174,7 @@ public: ...@@ -174,7 +174,7 @@ public:
std::function<void(data_size_t, const std::vector<std::string>&)> process_fun = std::function<void(data_size_t, const std::vector<std::string>&)> process_fun =
[this, &parser_fun, &predict_fun, &result_file] [this, &parser_fun, &predict_fun, &result_file]
(data_size_t, const std::vector<std::string>& lines) { (data_size_t, const std::vector<std::string>& lines) {
std::vector<std::pair<int, float>> oneline_features; std::vector<std::pair<int, double>> oneline_features;
std::vector<std::string> pred_result(lines.size(), ""); std::vector<std::string> pred_result(lines.size(), "");
#pragma omp parallel for schedule(static) private(oneline_features) #pragma omp parallel for schedule(static) private(oneline_features)
for (data_size_t i = 0; i < static_cast<data_size_t>(lines.size()); ++i) { for (data_size_t i = 0; i < static_cast<data_size_t>(lines.size()); ++i) {
...@@ -197,10 +197,10 @@ public: ...@@ -197,10 +197,10 @@ public:
} }
private: private:
int PutFeatureValuesToBuffer(const std::vector<std::pair<int, float>>& features) { int PutFeatureValuesToBuffer(const std::vector<std::pair<int, double>>& features) {
int tid = omp_get_thread_num(); int tid = omp_get_thread_num();
// init feature value // init feature value
std::memset(features_[tid], 0, sizeof(float)*num_features_); std::memset(features_[tid], 0, sizeof(double)*num_features_);
// put feature value // put feature value
for (const auto& p : features) { for (const auto& p : features) {
if (p.first < num_features_) { if (p.first < num_features_) {
...@@ -212,7 +212,7 @@ private: ...@@ -212,7 +212,7 @@ private:
/*! \brief Boosting model */ /*! \brief Boosting model */
const Boosting* boosting_; const Boosting* boosting_;
/*! \brief Buffer for feature values */ /*! \brief Buffer for feature values */
float** features_; double** features_;
/*! \brief Number of features */ /*! \brief Number of features */
int num_features_; int num_features_;
/*! \brief Number of classes */ /*! \brief Number of classes */
......
...@@ -229,7 +229,7 @@ bool GBDT::OutputMetric(int iter) { ...@@ -229,7 +229,7 @@ bool GBDT::OutputMetric(int iter) {
for (auto& sub_metric : training_metrics_) { for (auto& sub_metric : training_metrics_) {
auto name = sub_metric->GetName(); auto name = sub_metric->GetName();
auto scores = sub_metric->Eval(train_score_updater_->score()); auto scores = sub_metric->Eval(train_score_updater_->score());
Log::Info("Iteration:%d, %s : %s", iter, name, Common::ArrayToString<float>(scores, ' ').c_str()); Log::Info("Iteration:%d, %s : %s", iter, name, Common::ArrayToString<double>(scores, ' ').c_str());
} }
} }
// print validation metric // print validation metric
...@@ -239,7 +239,7 @@ bool GBDT::OutputMetric(int iter) { ...@@ -239,7 +239,7 @@ bool GBDT::OutputMetric(int iter) {
auto test_scores = valid_metrics_[i][j]->Eval(valid_score_updater_[i]->score()); auto test_scores = valid_metrics_[i][j]->Eval(valid_score_updater_[i]->score());
if ((iter % gbdt_config_->output_freq) == 0) { if ((iter % gbdt_config_->output_freq) == 0) {
auto name = valid_metrics_[i][j]->GetName(); auto name = valid_metrics_[i][j]->GetName();
Log::Info("Iteration:%d, %s : %s", iter, name, Common::ArrayToString<float>(test_scores, ' ').c_str()); Log::Info("Iteration:%d, %s : %s", iter, name, Common::ArrayToString<double>(test_scores, ' ').c_str());
} }
if (!ret && early_stopping_round_ > 0) { if (!ret && early_stopping_round_ > 0) {
bool the_bigger_the_better = valid_metrics_[i][j]->is_bigger_better(); bool the_bigger_the_better = valid_metrics_[i][j]->is_bigger_better();
...@@ -266,7 +266,7 @@ std::vector<std::string> GBDT::EvalCurrent(bool is_eval_train) const { ...@@ -266,7 +266,7 @@ std::vector<std::string> GBDT::EvalCurrent(bool is_eval_train) const {
auto name = sub_metric->GetName(); auto name = sub_metric->GetName();
auto scores = sub_metric->Eval(train_score_updater_->score()); auto scores = sub_metric->Eval(train_score_updater_->score());
std::stringstream str_buf; std::stringstream str_buf;
str_buf << name << " : " << Common::ArrayToString<float>(scores, ' '); str_buf << name << " : " << Common::ArrayToString<double>(scores, ' ');
ret.emplace_back(str_buf.str()); ret.emplace_back(str_buf.str());
} }
} }
...@@ -276,7 +276,7 @@ std::vector<std::string> GBDT::EvalCurrent(bool is_eval_train) const { ...@@ -276,7 +276,7 @@ std::vector<std::string> GBDT::EvalCurrent(bool is_eval_train) const {
auto name = valid_metrics_[i][j]->GetName(); auto name = valid_metrics_[i][j]->GetName();
auto test_scores = valid_metrics_[i][j]->Eval(valid_score_updater_[i]->score()); auto test_scores = valid_metrics_[i][j]->Eval(valid_score_updater_[i]->score());
std::stringstream str_buf; std::stringstream str_buf;
str_buf << name << " : " << Common::ArrayToString<float>(test_scores, ' '); str_buf << name << " : " << Common::ArrayToString<double>(test_scores, ' ');
ret.emplace_back(str_buf.str()); ret.emplace_back(str_buf.str());
} }
} }
...@@ -420,7 +420,7 @@ void GBDT::ModelsFromString(const std::string& model_str) { ...@@ -420,7 +420,7 @@ void GBDT::ModelsFromString(const std::string& model_str) {
} }
// if sigmoid doesn't exists // if sigmoid doesn't exists
if (i == lines.size()) { if (i == lines.size()) {
sigmoid_ = -1.0; sigmoid_ = -1.0f;
} }
// get tree models // get tree models
i = 0; i = 0;
...@@ -467,22 +467,22 @@ std::string GBDT::FeatureImportance() const { ...@@ -467,22 +467,22 @@ std::string GBDT::FeatureImportance() const {
return str_buf.str(); return str_buf.str();
} }
float GBDT::PredictRaw(const float* value, int num_used_model) const { double GBDT::PredictRaw(const double* value, int num_used_model) const {
if (num_used_model < 0) { if (num_used_model < 0) {
num_used_model = static_cast<int>(models_.size()); num_used_model = static_cast<int>(models_.size());
} }
float ret = 0.0f; double ret = 0.0f;
for (int i = 0; i < num_used_model; ++i) { for (int i = 0; i < num_used_model; ++i) {
ret += models_[i]->Predict(value); ret += models_[i]->Predict(value);
} }
return ret; return ret;
} }
float GBDT::Predict(const float* value, int num_used_model) const { double GBDT::Predict(const double* value, int num_used_model) const {
if (num_used_model < 0) { if (num_used_model < 0) {
num_used_model = static_cast<int>(models_.size()); num_used_model = static_cast<int>(models_.size());
} }
float ret = 0.0f; double ret = 0.0f;
for (int i = 0; i < num_used_model; ++i) { for (int i = 0; i < num_used_model; ++i) {
ret += models_[i]->Predict(value); ret += models_[i]->Predict(value);
} }
...@@ -493,11 +493,11 @@ float GBDT::Predict(const float* value, int num_used_model) const { ...@@ -493,11 +493,11 @@ float GBDT::Predict(const float* value, int num_used_model) const {
return ret; return ret;
} }
std::vector<float> GBDT::PredictMulticlass(const float* value, int num_used_model) const { std::vector<double> GBDT::PredictMulticlass(const double* value, int num_used_model) const {
if (num_used_model < 0) { if (num_used_model < 0) {
num_used_model = static_cast<int>(models_.size()) / num_class_; num_used_model = static_cast<int>(models_.size()) / num_class_;
} }
std::vector<float> ret(num_class_, 0.0f); std::vector<double> ret(num_class_, 0.0f);
for (int i = 0; i < num_used_model; ++i) { for (int i = 0; i < num_used_model; ++i) {
for (int j = 0; j < num_class_; ++j){ for (int j = 0; j < num_class_; ++j){
ret[j] += models_[i * num_class_ + j] -> Predict(value); ret[j] += models_[i * num_class_ + j] -> Predict(value);
...@@ -507,7 +507,7 @@ std::vector<float> GBDT::PredictMulticlass(const float* value, int num_used_mode ...@@ -507,7 +507,7 @@ std::vector<float> GBDT::PredictMulticlass(const float* value, int num_used_mode
return ret; return ret;
} }
std::vector<int> GBDT::PredictLeafIndex(const float* value, int num_used_model) const { std::vector<int> GBDT::PredictLeafIndex(const double* value, int num_used_model) const {
if (num_used_model < 0) { if (num_used_model < 0) {
num_used_model = static_cast<int>(models_.size()); num_used_model = static_cast<int>(models_.size());
} }
......
...@@ -58,7 +58,7 @@ public: ...@@ -58,7 +58,7 @@ public:
* \param num_used_model Number of used model * \param num_used_model Number of used model
* \return Prediction result for this record * \return Prediction result for this record
*/ */
float PredictRaw(const float* feature_values, int num_used_model) const override; double PredictRaw(const double* feature_values, int num_used_model) const override;
/*! /*!
* \brief Predtion for one record with sigmoid transformation if enabled * \brief Predtion for one record with sigmoid transformation if enabled
...@@ -66,14 +66,14 @@ public: ...@@ -66,14 +66,14 @@ public:
* \param num_used_model Number of used model * \param num_used_model Number of used model
* \return Prediction result for this record * \return Prediction result for this record
*/ */
float Predict(const float* feature_values, int num_used_model) const override; double Predict(const double* feature_values, int num_used_model) const override;
/*! /*!
* \brief Predtion for multiclass classification * \brief Predtion for multiclass classification
* \param feature_values Feature value on this record * \param feature_values Feature value on this record
* \return Prediction result, num_class numbers per line * \return Prediction result, num_class numbers per line
*/ */
std::vector<float> PredictMulticlass(const float* value, int num_used_model) const override; std::vector<double> PredictMulticlass(const double* value, int num_used_model) const override;
/*! /*!
* \brief Predtion for one record with leaf index * \brief Predtion for one record with leaf index
...@@ -81,7 +81,7 @@ public: ...@@ -81,7 +81,7 @@ public:
* \param num_used_model Number of used model * \param num_used_model Number of used model
* \return Predicted leaf index for this record * \return Predicted leaf index for this record
*/ */
std::vector<int> PredictLeafIndex(const float* value, int num_used_model) const override; std::vector<int> PredictLeafIndex(const double* value, int num_used_model) const override;
/*! /*!
* \brief Serialize models by string * \brief Serialize models by string
...@@ -177,7 +177,7 @@ private: ...@@ -177,7 +177,7 @@ private:
int early_stopping_round_; int early_stopping_round_;
/*! \brief Best score(s) for early stopping */ /*! \brief Best score(s) for early stopping */
std::vector<std::vector<int>> best_iter_; std::vector<std::vector<int>> best_iter_;
std::vector<std::vector<score_t>> best_score_; std::vector<std::vector<double>> best_score_;
/*! \brief Trained models(trees) */ /*! \brief Trained models(trees) */
std::vector<Tree*> models_; std::vector<Tree*> models_;
/*! \brief Max feature index of training data*/ /*! \brief Max feature index of training data*/
...@@ -204,7 +204,7 @@ private: ...@@ -204,7 +204,7 @@ private:
* \brief Sigmoid parameter, used for prediction. * \brief Sigmoid parameter, used for prediction.
* if > 0 meas output score will transform by sigmoid function * if > 0 meas output score will transform by sigmoid function
*/ */
float sigmoid_; double sigmoid_;
/*! \brief Index of label column */ /*! \brief Index of label column */
data_size_t label_idx_; data_size_t label_idx_;
/*! \brief Saved number of models */ /*! \brief Saved number of models */
......
...@@ -24,7 +24,7 @@ BinMapper::BinMapper(const BinMapper& other) ...@@ -24,7 +24,7 @@ BinMapper::BinMapper(const BinMapper& other)
num_bin_ = other.num_bin_; num_bin_ = other.num_bin_;
is_trival_ = other.is_trival_; is_trival_ = other.is_trival_;
sparse_rate_ = other.sparse_rate_; sparse_rate_ = other.sparse_rate_;
bin_upper_bound_ = new float[num_bin_]; bin_upper_bound_ = new double[num_bin_];
for (int i = 0; i < num_bin_; ++i) { for (int i = 0; i < num_bin_; ++i) {
bin_upper_bound_[i] = other.bin_upper_bound_[i]; bin_upper_bound_[i] = other.bin_upper_bound_[i];
} }
...@@ -39,11 +39,11 @@ BinMapper::~BinMapper() { ...@@ -39,11 +39,11 @@ BinMapper::~BinMapper() {
delete[] bin_upper_bound_; delete[] bin_upper_bound_;
} }
void BinMapper::FindBin(std::vector<float>* values, int max_bin) { void BinMapper::FindBin(std::vector<double>* values, int max_bin) {
std::vector<float>& ref_values = (*values); std::vector<double>& ref_values = (*values);
size_t sample_size = values->size(); size_t sample_size = values->size();
// find distinct_values first // find distinct_values first
std::vector<float> distinct_values; std::vector<double> distinct_values;
std::vector<int> counts; std::vector<int> counts;
std::sort(ref_values.begin(), ref_values.end()); std::sort(ref_values.begin(), ref_values.end());
...@@ -63,21 +63,21 @@ void BinMapper::FindBin(std::vector<float>* values, int max_bin) { ...@@ -63,21 +63,21 @@ void BinMapper::FindBin(std::vector<float>* values, int max_bin) {
if (num_values <= max_bin) { if (num_values <= max_bin) {
// use distinct value is enough // use distinct value is enough
num_bin_ = num_values; num_bin_ = num_values;
bin_upper_bound_ = new float[num_values]; bin_upper_bound_ = new double[num_values];
for (int i = 0; i < num_values - 1; ++i) { for (int i = 0; i < num_values - 1; ++i) {
bin_upper_bound_[i] = (distinct_values[i] + distinct_values[i + 1]) / 2; bin_upper_bound_[i] = (distinct_values[i] + distinct_values[i + 1]) / 2;
} }
cnt_in_bin0 = counts[0]; cnt_in_bin0 = counts[0];
bin_upper_bound_[num_values - 1] = std::numeric_limits<float>::infinity(); bin_upper_bound_[num_values - 1] = std::numeric_limits<double>::infinity();
} else { } else {
// mean size for one bin // mean size for one bin
float mean_bin_size = sample_size / static_cast<float>(max_bin); double mean_bin_size = sample_size / static_cast<double>(max_bin);
int rest_sample_cnt = static_cast<int>(sample_size); int rest_sample_cnt = static_cast<int>(sample_size);
int bin_cnt = 0; int bin_cnt = 0;
num_bin_ = max_bin; num_bin_ = max_bin;
std::vector<float> upper_bounds(max_bin, std::numeric_limits<float>::infinity()); std::vector<double> upper_bounds(max_bin, std::numeric_limits<double>::infinity());
std::vector<float> lower_bounds(max_bin, std::numeric_limits<float>::infinity()); std::vector<double> lower_bounds(max_bin, std::numeric_limits<double>::infinity());
// sort by count, descent // sort by count, descent
Common::SortForPair(counts, distinct_values, 0, true); Common::SortForPair(counts, distinct_values, 0, true);
// fetch big slot as unique bin // fetch big slot as unique bin
...@@ -90,8 +90,8 @@ void BinMapper::FindBin(std::vector<float>* values, int max_bin) { ...@@ -90,8 +90,8 @@ void BinMapper::FindBin(std::vector<float>* values, int max_bin) {
// process reminder bins // process reminder bins
if (bin_cnt < max_bin) { if (bin_cnt < max_bin) {
// sort rest by values // sort rest by values
Common::SortForPair<float, int>(distinct_values, counts, bin_cnt, false); Common::SortForPair<double, int>(distinct_values, counts, bin_cnt, false);
mean_bin_size = rest_sample_cnt / static_cast<float>(max_bin - bin_cnt); mean_bin_size = rest_sample_cnt / static_cast<double>(max_bin - bin_cnt);
lower_bounds[bin_cnt] = distinct_values[bin_cnt]; lower_bounds[bin_cnt] = distinct_values[bin_cnt];
int cur_cnt_inbin = 0; int cur_cnt_inbin = 0;
for (int i = bin_cnt; i < num_values - 1; ++i) { for (int i = bin_cnt; i < num_values - 1; ++i) {
...@@ -105,21 +105,21 @@ void BinMapper::FindBin(std::vector<float>* values, int max_bin) { ...@@ -105,21 +105,21 @@ void BinMapper::FindBin(std::vector<float>* values, int max_bin) {
lower_bounds[bin_cnt] = distinct_values[i + 1]; lower_bounds[bin_cnt] = distinct_values[i + 1];
if (bin_cnt >= max_bin - 1) break; if (bin_cnt >= max_bin - 1) break;
cur_cnt_inbin = 0; cur_cnt_inbin = 0;
mean_bin_size = rest_sample_cnt / static_cast<float>(max_bin - bin_cnt); mean_bin_size = rest_sample_cnt / static_cast<double>(max_bin - bin_cnt);
} }
} }
cur_cnt_inbin += counts[num_values - 1]; cur_cnt_inbin += counts[num_values - 1];
} }
Common::SortForPair<float, float>(lower_bounds, upper_bounds, 0, false); Common::SortForPair<double, double>(lower_bounds, upper_bounds, 0, false);
// update bin upper bound // update bin upper bound
bin_upper_bound_ = new float[bin_cnt]; bin_upper_bound_ = new double[bin_cnt];
num_bin_ = bin_cnt; num_bin_ = bin_cnt;
for (int i = 0; i < bin_cnt - 1; ++i) { for (int i = 0; i < bin_cnt - 1; ++i) {
bin_upper_bound_[i] = (upper_bounds[i] + lower_bounds[i + 1]) / 2.0f; bin_upper_bound_[i] = (upper_bounds[i] + lower_bounds[i + 1]) / 2.0f;
} }
// last bin upper bound // last bin upper bound
bin_upper_bound_[bin_cnt - 1] = std::numeric_limits<float>::infinity(); bin_upper_bound_[bin_cnt - 1] = std::numeric_limits<double>::infinity();
} }
// check trival(num_bin_ == 1) feature // check trival(num_bin_ == 1) feature
if (num_bin_ <= 1) { if (num_bin_ <= 1) {
...@@ -128,7 +128,7 @@ void BinMapper::FindBin(std::vector<float>* values, int max_bin) { ...@@ -128,7 +128,7 @@ void BinMapper::FindBin(std::vector<float>* values, int max_bin) {
is_trival_ = false; is_trival_ = false;
} }
// calculate sparse rate // calculate sparse rate
sparse_rate_ = static_cast<float>(cnt_in_bin0) / static_cast<float>(sample_size); sparse_rate_ = static_cast<double>(cnt_in_bin0) / static_cast<double>(sample_size);
} }
...@@ -136,8 +136,8 @@ int BinMapper::SizeForSpecificBin(int bin) { ...@@ -136,8 +136,8 @@ int BinMapper::SizeForSpecificBin(int bin) {
int size = 0; int size = 0;
size += sizeof(int); size += sizeof(int);
size += sizeof(bool); size += sizeof(bool);
size += sizeof(float); size += sizeof(double);
size += bin * sizeof(float); size += bin * sizeof(double);
return size; return size;
} }
...@@ -148,7 +148,7 @@ void BinMapper::CopyTo(char * buffer) { ...@@ -148,7 +148,7 @@ void BinMapper::CopyTo(char * buffer) {
buffer += sizeof(is_trival_); buffer += sizeof(is_trival_);
std::memcpy(buffer, &sparse_rate_, sizeof(sparse_rate_)); std::memcpy(buffer, &sparse_rate_, sizeof(sparse_rate_));
buffer += sizeof(sparse_rate_); buffer += sizeof(sparse_rate_);
std::memcpy(buffer, bin_upper_bound_, num_bin_ * sizeof(float)); std::memcpy(buffer, bin_upper_bound_, num_bin_ * sizeof(double));
} }
void BinMapper::CopyFrom(const char * buffer) { void BinMapper::CopyFrom(const char * buffer) {
...@@ -159,19 +159,19 @@ void BinMapper::CopyFrom(const char * buffer) { ...@@ -159,19 +159,19 @@ void BinMapper::CopyFrom(const char * buffer) {
std::memcpy(&sparse_rate_, buffer, sizeof(sparse_rate_)); std::memcpy(&sparse_rate_, buffer, sizeof(sparse_rate_));
buffer += sizeof(sparse_rate_); buffer += sizeof(sparse_rate_);
if (bin_upper_bound_ != nullptr) { delete[] bin_upper_bound_; } if (bin_upper_bound_ != nullptr) { delete[] bin_upper_bound_; }
bin_upper_bound_ = new float[num_bin_]; bin_upper_bound_ = new double[num_bin_];
std::memcpy(bin_upper_bound_, buffer, num_bin_ * sizeof(float)); std::memcpy(bin_upper_bound_, buffer, num_bin_ * sizeof(double));
} }
void BinMapper::SaveBinaryToFile(FILE* file) const { void BinMapper::SaveBinaryToFile(FILE* file) const {
fwrite(&num_bin_, sizeof(num_bin_), 1, file); fwrite(&num_bin_, sizeof(num_bin_), 1, file);
fwrite(&is_trival_, sizeof(is_trival_), 1, file); fwrite(&is_trival_, sizeof(is_trival_), 1, file);
fwrite(&sparse_rate_, sizeof(sparse_rate_), 1, file); fwrite(&sparse_rate_, sizeof(sparse_rate_), 1, file);
fwrite(bin_upper_bound_, sizeof(float), num_bin_, file); fwrite(bin_upper_bound_, sizeof(double), num_bin_, file);
} }
size_t BinMapper::SizesInByte() const { size_t BinMapper::SizesInByte() const {
return sizeof(num_bin_) + sizeof(is_trival_) + sizeof(sparse_rate_) + sizeof(float) * num_bin_; return sizeof(num_bin_) + sizeof(is_trival_) + sizeof(sparse_rate_) + sizeof(double) * num_bin_;
} }
template class DenseBin<uint8_t>; template class DenseBin<uint8_t>;
...@@ -187,9 +187,9 @@ template class OrderedSparseBin<uint16_t>; ...@@ -187,9 +187,9 @@ template class OrderedSparseBin<uint16_t>;
template class OrderedSparseBin<uint32_t>; template class OrderedSparseBin<uint32_t>;
Bin* Bin::CreateBin(data_size_t num_data, int num_bin, float sparse_rate, bool is_enable_sparse, bool* is_sparse, int default_bin) { Bin* Bin::CreateBin(data_size_t num_data, int num_bin, double sparse_rate, bool is_enable_sparse, bool* is_sparse, int default_bin) {
// sparse threshold // sparse threshold
const float kSparseThreshold = 0.8f; const double kSparseThreshold = 0.8f;
if (sparse_rate >= kSparseThreshold && is_enable_sparse) { if (sparse_rate >= kSparseThreshold && is_enable_sparse) {
*is_sparse = true; *is_sparse = true;
return CreateSparseBin(num_data, num_bin, default_bin); return CreateSparseBin(num_data, num_bin, default_bin);
......
...@@ -216,38 +216,38 @@ void IOConfig::Set(const std::unordered_map<std::string, std::string>& params) { ...@@ -216,38 +216,38 @@ void IOConfig::Set(const std::unordered_map<std::string, std::string>& params) {
void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& params) { void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& params) {
GetBool(params, "is_unbalance", &is_unbalance); GetBool(params, "is_unbalance", &is_unbalance);
GetFloat(params, "sigmoid", &sigmoid); GetDouble(params, "sigmoid", &sigmoid);
GetInt(params, "max_position", &max_position); GetInt(params, "max_position", &max_position);
CHECK(max_position > 0); CHECK(max_position > 0);
GetInt(params, "num_class", &num_class); GetInt(params, "num_class", &num_class);
CHECK(num_class >= 1); CHECK(num_class >= 1);
std::string tmp_str = ""; std::string tmp_str = "";
if (GetString(params, "label_gain", &tmp_str)) { if (GetString(params, "label_gain", &tmp_str)) {
label_gain = Common::StringToFloatArray(tmp_str, ','); label_gain = Common::StringToDoubleArray(tmp_str, ',');
} else { } else {
// label_gain = 2^i - 1, may overflow, so we use 31 here // label_gain = 2^i - 1, may overflow, so we use 31 here
const int max_label = 31; const int max_label = 31;
label_gain.push_back(0.0f); label_gain.push_back(0.0f);
for (int i = 1; i < max_label; ++i) { for (int i = 1; i < max_label; ++i) {
label_gain.push_back(static_cast<float>((1 << i) - 1)); label_gain.push_back(static_cast<double>((1 << i) - 1));
} }
} }
} }
void MetricConfig::Set(const std::unordered_map<std::string, std::string>& params) { void MetricConfig::Set(const std::unordered_map<std::string, std::string>& params) {
GetFloat(params, "sigmoid", &sigmoid); GetDouble(params, "sigmoid", &sigmoid);
GetInt(params, "num_class", &num_class); GetInt(params, "num_class", &num_class);
CHECK(num_class >= 1); CHECK(num_class >= 1);
std::string tmp_str = ""; std::string tmp_str = "";
if (GetString(params, "label_gain", &tmp_str)) { if (GetString(params, "label_gain", &tmp_str)) {
label_gain = Common::StringToFloatArray(tmp_str, ','); label_gain = Common::StringToDoubleArray(tmp_str, ',');
} else { } else {
// label_gain = 2^i - 1, may overflow, so we use 31 here // label_gain = 2^i - 1, may overflow, so we use 31 here
const int max_label = 31; const int max_label = 31;
label_gain.push_back(0.0f); label_gain.push_back(0.0f);
for (int i = 1; i < max_label; ++i) { for (int i = 1; i < max_label; ++i) {
label_gain.push_back(static_cast<float>((1 << i) - 1)); label_gain.push_back(static_cast<double>((1 << i) - 1));
} }
} }
if (GetString(params, "ndcg_eval_at", &tmp_str)) { if (GetString(params, "ndcg_eval_at", &tmp_str)) {
...@@ -267,14 +267,14 @@ void MetricConfig::Set(const std::unordered_map<std::string, std::string>& param ...@@ -267,14 +267,14 @@ void MetricConfig::Set(const std::unordered_map<std::string, std::string>& param
void TreeConfig::Set(const std::unordered_map<std::string, std::string>& params) { void TreeConfig::Set(const std::unordered_map<std::string, std::string>& params) {
GetInt(params, "min_data_in_leaf", &min_data_in_leaf); GetInt(params, "min_data_in_leaf", &min_data_in_leaf);
GetFloat(params, "min_sum_hessian_in_leaf", &min_sum_hessian_in_leaf); GetDouble(params, "min_sum_hessian_in_leaf", &min_sum_hessian_in_leaf);
CHECK(min_sum_hessian_in_leaf > 1.0f || min_data_in_leaf > 0); CHECK(min_sum_hessian_in_leaf > 1.0f || min_data_in_leaf > 0);
GetInt(params, "num_leaves", &num_leaves); GetInt(params, "num_leaves", &num_leaves);
CHECK(num_leaves > 1); CHECK(num_leaves > 1);
GetInt(params, "feature_fraction_seed", &feature_fraction_seed); GetInt(params, "feature_fraction_seed", &feature_fraction_seed);
GetFloat(params, "feature_fraction", &feature_fraction); GetDouble(params, "feature_fraction", &feature_fraction);
CHECK(feature_fraction > 0.0f && feature_fraction <= 1.0f); CHECK(feature_fraction > 0.0f && feature_fraction <= 1.0f);
GetFloat(params, "histogram_pool_size", &histogram_pool_size); GetDouble(params, "histogram_pool_size", &histogram_pool_size);
GetInt(params, "max_depth", &max_depth); GetInt(params, "max_depth", &max_depth);
CHECK(max_depth > 1 || max_depth < 0); CHECK(max_depth > 1 || max_depth < 0);
} }
...@@ -286,9 +286,9 @@ void BoostingConfig::Set(const std::unordered_map<std::string, std::string>& par ...@@ -286,9 +286,9 @@ void BoostingConfig::Set(const std::unordered_map<std::string, std::string>& par
GetInt(params, "bagging_seed", &bagging_seed); GetInt(params, "bagging_seed", &bagging_seed);
GetInt(params, "bagging_freq", &bagging_freq); GetInt(params, "bagging_freq", &bagging_freq);
CHECK(bagging_freq >= 0); CHECK(bagging_freq >= 0);
GetFloat(params, "bagging_fraction", &bagging_fraction); GetDouble(params, "bagging_fraction", &bagging_fraction);
CHECK(bagging_fraction > 0.0f && bagging_fraction <= 1.0f); CHECK(bagging_fraction > 0.0f && bagging_fraction <= 1.0f);
GetFloat(params, "learning_rate", &learning_rate); GetDouble(params, "learning_rate", &learning_rate);
CHECK(learning_rate > 0.0f); CHECK(learning_rate > 0.0f);
GetInt(params, "early_stopping_round", &early_stopping_round); GetInt(params, "early_stopping_round", &early_stopping_round);
CHECK(early_stopping_round >= 0); CHECK(early_stopping_round >= 0);
......
...@@ -364,10 +364,10 @@ void Dataset::SetField(const char* field_name, const void* field_data, data_size ...@@ -364,10 +364,10 @@ void Dataset::SetField(const char* field_name, const void* field_data, data_size
void Dataset::ConstructBinMappers(int rank, int num_machines, const std::vector<std::string>& sample_data) { void Dataset::ConstructBinMappers(int rank, int num_machines, const std::vector<std::string>& sample_data) {
// sample_values[i][j], means the value of j-th sample on i-th feature // sample_values[i][j], means the value of j-th sample on i-th feature
std::vector<std::vector<float>> sample_values; std::vector<std::vector<double>> sample_values;
// temp buffer for one line features and label // temp buffer for one line features and label
std::vector<std::pair<int, float>> oneline_features; std::vector<std::pair<int, double>> oneline_features;
float label; double label;
for (size_t i = 0; i < sample_data.size(); ++i) { for (size_t i = 0; i < sample_data.size(); ++i) {
oneline_features.clear(); oneline_features.clear();
// parse features // parse features
...@@ -376,7 +376,7 @@ void Dataset::ConstructBinMappers(int rank, int num_machines, const std::vector< ...@@ -376,7 +376,7 @@ void Dataset::ConstructBinMappers(int rank, int num_machines, const std::vector<
for (auto& feature_values : sample_values) { for (auto& feature_values : sample_values) {
feature_values.push_back(0.0); feature_values.push_back(0.0);
} }
for (std::pair<int, float>& inner_data : oneline_features) { for (std::pair<int, double>& inner_data : oneline_features) {
if (static_cast<size_t>(inner_data.first) >= sample_values.size()) { if (static_cast<size_t>(inner_data.first) >= sample_values.size()) {
// if need expand feature set // if need expand feature set
size_t need_size = inner_data.first - sample_values.size() + 1; size_t need_size = inner_data.first - sample_values.size() + 1;
...@@ -571,6 +571,8 @@ void Dataset::LoadValidationData(const Dataset* train_set, bool use_two_round_lo ...@@ -571,6 +571,8 @@ void Dataset::LoadValidationData(const Dataset* train_set, bool use_two_round_lo
} }
used_feature_map_ = train_set->used_feature_map_; used_feature_map_ = train_set->used_feature_map_;
num_features_ = static_cast<int>(features_.size()); num_features_ = static_cast<int>(features_.size());
num_total_features_ = train_set->num_total_features_;
feature_names_ = train_set->feature_names_;
// extract features // extract features
ExtractFeaturesFromMemory(); ExtractFeaturesFromMemory();
} else { } else {
...@@ -585,6 +587,8 @@ void Dataset::LoadValidationData(const Dataset* train_set, bool use_two_round_lo ...@@ -585,6 +587,8 @@ void Dataset::LoadValidationData(const Dataset* train_set, bool use_two_round_lo
} }
used_feature_map_ = train_set->used_feature_map_; used_feature_map_ = train_set->used_feature_map_;
num_features_ = static_cast<int>(features_.size()); num_features_ = static_cast<int>(features_.size());
num_total_features_ = train_set->num_total_features_;
feature_names_ = train_set->feature_names_;
// extract features // extract features
ExtractFeaturesFromFile(); ExtractFeaturesFromFile();
} }
...@@ -601,8 +605,8 @@ void Dataset::LoadValidationData(const Dataset* train_set, bool use_two_round_lo ...@@ -601,8 +605,8 @@ void Dataset::LoadValidationData(const Dataset* train_set, bool use_two_round_lo
} }
void Dataset::ExtractFeaturesFromMemory() { void Dataset::ExtractFeaturesFromMemory() {
std::vector<std::pair<int, float>> oneline_features; std::vector<std::pair<int, double>> oneline_features;
float tmp_label = 0.0f; double tmp_label = 0.0f;
if (predict_fun_ == nullptr) { if (predict_fun_ == nullptr) {
// if doesn't need to prediction with initial model // if doesn't need to prediction with initial model
#pragma omp parallel for schedule(guided) private(oneline_features) firstprivate(tmp_label) #pragma omp parallel for schedule(guided) private(oneline_features) firstprivate(tmp_label)
...@@ -612,7 +616,7 @@ void Dataset::ExtractFeaturesFromMemory() { ...@@ -612,7 +616,7 @@ void Dataset::ExtractFeaturesFromMemory() {
// parser // parser
parser_->ParseOneLine(text_reader_->Lines()[i].c_str(), &oneline_features, &tmp_label); parser_->ParseOneLine(text_reader_->Lines()[i].c_str(), &oneline_features, &tmp_label);
// set label // set label
metadata_.SetLabelAt(i, tmp_label); metadata_.SetLabelAt(i, static_cast<float>(tmp_label));
// free processed line: // free processed line:
text_reader_->Lines()[i].clear(); text_reader_->Lines()[i].clear();
// shrink_to_fit will be very slow in linux, and seems not free memory, disable for now // shrink_to_fit will be very slow in linux, and seems not free memory, disable for now
...@@ -626,9 +630,9 @@ void Dataset::ExtractFeaturesFromMemory() { ...@@ -626,9 +630,9 @@ void Dataset::ExtractFeaturesFromMemory() {
} }
else { else {
if (inner_data.first == weight_idx_) { if (inner_data.first == weight_idx_) {
metadata_.SetWeightAt(i, inner_data.second); metadata_.SetWeightAt(i, static_cast<float>(inner_data.second));
} else if (inner_data.first == group_idx_) { } else if (inner_data.first == group_idx_) {
metadata_.SetQueryAt(i, inner_data.second); metadata_.SetQueryAt(i, static_cast<float>(inner_data.second));
} }
} }
} }
...@@ -645,7 +649,7 @@ void Dataset::ExtractFeaturesFromMemory() { ...@@ -645,7 +649,7 @@ void Dataset::ExtractFeaturesFromMemory() {
// set initial score // set initial score
init_score[i] = static_cast<float>(predict_fun_(oneline_features)); init_score[i] = static_cast<float>(predict_fun_(oneline_features));
// set label // set label
metadata_.SetLabelAt(i, tmp_label); metadata_.SetLabelAt(i, static_cast<float>(tmp_label));
// free processed line: // free processed line:
text_reader_->Lines()[i].clear(); text_reader_->Lines()[i].clear();
// shrink_to_fit will be very slow in linux, and seems not free memory, disable for now // shrink_to_fit will be very slow in linux, and seems not free memory, disable for now
...@@ -659,9 +663,9 @@ void Dataset::ExtractFeaturesFromMemory() { ...@@ -659,9 +663,9 @@ void Dataset::ExtractFeaturesFromMemory() {
} }
else { else {
if (inner_data.first == weight_idx_) { if (inner_data.first == weight_idx_) {
metadata_.SetWeightAt(i, inner_data.second); metadata_.SetWeightAt(i, static_cast<float>(inner_data.second));
} else if (inner_data.first == group_idx_) { } else if (inner_data.first == group_idx_) {
metadata_.SetQueryAt(i, inner_data.second); metadata_.SetQueryAt(i, static_cast<float>(inner_data.second));
} }
} }
} }
...@@ -688,8 +692,8 @@ void Dataset::ExtractFeaturesFromFile() { ...@@ -688,8 +692,8 @@ void Dataset::ExtractFeaturesFromFile() {
std::function<void(data_size_t, const std::vector<std::string>&)> process_fun = std::function<void(data_size_t, const std::vector<std::string>&)> process_fun =
[this, &init_score] [this, &init_score]
(data_size_t start_idx, const std::vector<std::string>& lines) { (data_size_t start_idx, const std::vector<std::string>& lines) {
std::vector<std::pair<int, float>> oneline_features; std::vector<std::pair<int, double>> oneline_features;
float tmp_label = 0.0f; double tmp_label = 0.0f;
#pragma omp parallel for schedule(static) private(oneline_features) firstprivate(tmp_label) #pragma omp parallel for schedule(static) private(oneline_features) firstprivate(tmp_label)
for (data_size_t i = 0; i < static_cast<data_size_t>(lines.size()); ++i) { for (data_size_t i = 0; i < static_cast<data_size_t>(lines.size()); ++i) {
const int tid = omp_get_thread_num(); const int tid = omp_get_thread_num();
...@@ -701,7 +705,7 @@ void Dataset::ExtractFeaturesFromFile() { ...@@ -701,7 +705,7 @@ void Dataset::ExtractFeaturesFromFile() {
init_score[start_idx + i] = static_cast<float>(predict_fun_(oneline_features)); init_score[start_idx + i] = static_cast<float>(predict_fun_(oneline_features));
} }
// set label // set label
metadata_.SetLabelAt(start_idx + i, tmp_label); metadata_.SetLabelAt(start_idx + i, static_cast<float>(tmp_label));
// push data // push data
for (auto& inner_data : oneline_features) { for (auto& inner_data : oneline_features) {
int feature_idx = used_feature_map_[inner_data.first]; int feature_idx = used_feature_map_[inner_data.first];
...@@ -711,9 +715,9 @@ void Dataset::ExtractFeaturesFromFile() { ...@@ -711,9 +715,9 @@ void Dataset::ExtractFeaturesFromFile() {
} }
else { else {
if (inner_data.first == weight_idx_) { if (inner_data.first == weight_idx_) {
metadata_.SetWeightAt(start_idx + i, inner_data.second); metadata_.SetWeightAt(start_idx + i, static_cast<float>(inner_data.second));
} else if (inner_data.first == group_idx_) { } else if (inner_data.first == group_idx_) {
metadata_.SetQueryAt(start_idx + i, inner_data.second); metadata_.SetQueryAt(start_idx + i, static_cast<float>(inner_data.second));
} }
} }
} }
...@@ -763,7 +767,11 @@ void Dataset::SaveBinaryFile(const char* bin_filename) { ...@@ -763,7 +767,11 @@ void Dataset::SaveBinaryFile(const char* bin_filename) {
// get size of header // get size of header
size_t size_of_header = sizeof(global_num_data_) + sizeof(is_enable_sparse_) size_t size_of_header = sizeof(global_num_data_) + sizeof(is_enable_sparse_)
+ sizeof(max_bin_) + sizeof(num_data_) + sizeof(num_features_) + sizeof(size_t) + sizeof(int) * used_feature_map_.size(); + sizeof(max_bin_) + sizeof(num_data_) + sizeof(num_features_) + sizeof(num_total_features_) +sizeof(size_t) + sizeof(int) * used_feature_map_.size();
// size of feature names
for (int i = 0; i < num_total_features_; ++i) {
size_of_header += feature_names_[i].size() + sizeof(int);
}
fwrite(&size_of_header, sizeof(size_of_header), 1, file); fwrite(&size_of_header, sizeof(size_of_header), 1, file);
// write header // write header
fwrite(&global_num_data_, sizeof(global_num_data_), 1, file); fwrite(&global_num_data_, sizeof(global_num_data_), 1, file);
...@@ -771,10 +779,19 @@ void Dataset::SaveBinaryFile(const char* bin_filename) { ...@@ -771,10 +779,19 @@ void Dataset::SaveBinaryFile(const char* bin_filename) {
fwrite(&max_bin_, sizeof(max_bin_), 1, file); fwrite(&max_bin_, sizeof(max_bin_), 1, file);
fwrite(&num_data_, sizeof(num_data_), 1, file); fwrite(&num_data_, sizeof(num_data_), 1, file);
fwrite(&num_features_, sizeof(num_features_), 1, file); fwrite(&num_features_, sizeof(num_features_), 1, file);
fwrite(&num_total_features_, sizeof(num_features_), 1, file);
size_t num_used_feature_map = used_feature_map_.size(); size_t num_used_feature_map = used_feature_map_.size();
fwrite(&num_used_feature_map, sizeof(num_used_feature_map), 1, file); fwrite(&num_used_feature_map, sizeof(num_used_feature_map), 1, file);
fwrite(used_feature_map_.data(), sizeof(int), num_used_feature_map, file); fwrite(used_feature_map_.data(), sizeof(int), num_used_feature_map, file);
// write feature names
for (int i = 0; i < num_total_features_; ++i) {
int str_len = static_cast<int>(feature_names_[i].size());
fwrite(&str_len, sizeof(int), 1, file);
const char* c_str = feature_names_[i].c_str();
fwrite(c_str, sizeof(char), str_len, file);
}
// get size of meta data // get size of meta data
size_t size_of_metadata = metadata_.SizesInByte(); size_t size_of_metadata = metadata_.SizesInByte();
fwrite(&size_of_metadata, sizeof(size_of_metadata), 1, file); fwrite(&size_of_metadata, sizeof(size_of_metadata), 1, file);
...@@ -864,6 +881,8 @@ void Dataset::LoadDataFromBinFile(const char* bin_filename, int rank, int num_ma ...@@ -864,6 +881,8 @@ void Dataset::LoadDataFromBinFile(const char* bin_filename, int rank, int num_ma
mem_ptr += sizeof(num_data_); mem_ptr += sizeof(num_data_);
num_features_ = *(reinterpret_cast<const int*>(mem_ptr)); num_features_ = *(reinterpret_cast<const int*>(mem_ptr));
mem_ptr += sizeof(num_features_); mem_ptr += sizeof(num_features_);
num_total_features_ = *(reinterpret_cast<const int*>(mem_ptr));
mem_ptr += sizeof(num_total_features_);
size_t num_used_feature_map = *(reinterpret_cast<const size_t*>(mem_ptr)); size_t num_used_feature_map = *(reinterpret_cast<const size_t*>(mem_ptr));
mem_ptr += sizeof(num_used_feature_map); mem_ptr += sizeof(num_used_feature_map);
const int* tmp_feature_map = reinterpret_cast<const int*>(mem_ptr); const int* tmp_feature_map = reinterpret_cast<const int*>(mem_ptr);
...@@ -871,6 +890,21 @@ void Dataset::LoadDataFromBinFile(const char* bin_filename, int rank, int num_ma ...@@ -871,6 +890,21 @@ void Dataset::LoadDataFromBinFile(const char* bin_filename, int rank, int num_ma
for (size_t i = 0; i < num_used_feature_map; ++i) { for (size_t i = 0; i < num_used_feature_map; ++i) {
used_feature_map_.push_back(tmp_feature_map[i]); used_feature_map_.push_back(tmp_feature_map[i]);
} }
mem_ptr += sizeof(int) * num_used_feature_map;
// get feature names
feature_names_.clear();
// write feature names
for (int i = 0; i < num_total_features_; ++i) {
int str_len = *(reinterpret_cast<const int*>(mem_ptr));
mem_ptr += sizeof(int);
std::stringstream str_buf;
for (int j = 0; j < str_len; ++j) {
char tmp_char = *(reinterpret_cast<const char*>(mem_ptr));
mem_ptr += sizeof(char);
str_buf << tmp_char;
}
feature_names_.emplace_back(str_buf.str());
}
// read size of meta data // read size of meta data
read_cnt = fread(buffer, sizeof(size_t), 1, file); read_cnt = fread(buffer, sizeof(size_t), 1, file);
......
...@@ -281,9 +281,9 @@ void Metadata::LoadWeights() { ...@@ -281,9 +281,9 @@ void Metadata::LoadWeights() {
num_weights_ = static_cast<data_size_t>(reader.Lines().size()); num_weights_ = static_cast<data_size_t>(reader.Lines().size());
weights_ = new float[num_weights_]; weights_ = new float[num_weights_];
for (data_size_t i = 0; i < num_weights_; ++i) { for (data_size_t i = 0; i < num_weights_; ++i) {
float tmp_weight = 0.0f; double tmp_weight = 0.0f;
Common::Atof(reader.Lines()[i].c_str(), &tmp_weight); Common::Atof(reader.Lines()[i].c_str(), &tmp_weight);
weights_[i] = tmp_weight; weights_[i] = static_cast<float>(tmp_weight);
} }
} }
...@@ -296,10 +296,10 @@ void Metadata::LoadInitialScore() { ...@@ -296,10 +296,10 @@ void Metadata::LoadInitialScore() {
Log::Info("Start loading initial scores"); Log::Info("Start loading initial scores");
num_init_score_ = static_cast<data_size_t>(reader.Lines().size()); num_init_score_ = static_cast<data_size_t>(reader.Lines().size());
init_score_ = new float[num_init_score_]; init_score_ = new float[num_init_score_];
float tmp = 0.0f; double tmp = 0.0f;
for (data_size_t i = 0; i < num_init_score_; ++i) { for (data_size_t i = 0; i < num_init_score_; ++i) {
Common::Atof(reader.Lines()[i].c_str(), &tmp); Common::Atof(reader.Lines()[i].c_str(), &tmp);
init_score_[i] = tmp; init_score_[i] = static_cast<float>(tmp);
} }
} }
......
...@@ -18,9 +18,9 @@ public: ...@@ -18,9 +18,9 @@ public:
:label_idx_(label_idx) { :label_idx_(label_idx) {
} }
inline void ParseOneLine(const char* str, inline void ParseOneLine(const char* str,
std::vector<std::pair<int, float>>* out_features, float* out_label) const override { std::vector<std::pair<int, double>>* out_features, double* out_label) const override {
int idx = 0; int idx = 0;
float val = 0.0f; double val = 0.0f;
int bias = 0; int bias = 0;
*out_label = 0.0f; *out_label = 0.0f;
while (*str != '\0') { while (*str != '\0') {
...@@ -50,9 +50,9 @@ public: ...@@ -50,9 +50,9 @@ public:
:label_idx_(label_idx) { :label_idx_(label_idx) {
} }
inline void ParseOneLine(const char* str, inline void ParseOneLine(const char* str,
std::vector<std::pair<int, float>>* out_features, float* out_label) const override { std::vector<std::pair<int, double>>* out_features, double* out_label) const override {
int idx = 0; int idx = 0;
float val = 0.0f; double val = 0.0f;
int bias = 0; int bias = 0;
while (*str != '\0') { while (*str != '\0') {
str = Common::Atof(str, &val); str = Common::Atof(str, &val);
...@@ -83,9 +83,9 @@ public: ...@@ -83,9 +83,9 @@ public:
} }
} }
inline void ParseOneLine(const char* str, inline void ParseOneLine(const char* str,
std::vector<std::pair<int, float>>* out_features, float* out_label) const override { std::vector<std::pair<int, double>>* out_features, double* out_label) const override {
int idx = 0; int idx = 0;
float val = 0.0f; double val = 0.0f;
if (label_idx_ == 0) { if (label_idx_ == 0) {
str = Common::Atof(str, &val); str = Common::Atof(str, &val);
*out_label = val; *out_label = val;
......
...@@ -23,11 +23,11 @@ Tree::Tree(int max_leaves) ...@@ -23,11 +23,11 @@ Tree::Tree(int max_leaves)
split_feature_ = new int[max_leaves_ - 1]; split_feature_ = new int[max_leaves_ - 1];
split_feature_real_ = new int[max_leaves_ - 1]; split_feature_real_ = new int[max_leaves_ - 1];
threshold_in_bin_ = new unsigned int[max_leaves_ - 1]; threshold_in_bin_ = new unsigned int[max_leaves_ - 1];
threshold_ = new float[max_leaves_ - 1]; threshold_ = new double[max_leaves_ - 1];
split_gain_ = new float[max_leaves_ - 1]; split_gain_ = new double[max_leaves_ - 1];
leaf_parent_ = new int[max_leaves_]; leaf_parent_ = new int[max_leaves_];
leaf_value_ = new float[max_leaves_]; leaf_value_ = new double[max_leaves_];
leaf_depth_ = new int[max_leaves_]; leaf_depth_ = new int[max_leaves_];
// root is in the depth 1 // root is in the depth 1
leaf_depth_[0] = 1; leaf_depth_[0] = 1;
...@@ -48,7 +48,7 @@ Tree::~Tree() { ...@@ -48,7 +48,7 @@ Tree::~Tree() {
} }
int Tree::Split(int leaf, int feature, unsigned int threshold_bin, int real_feature, int Tree::Split(int leaf, int feature, unsigned int threshold_bin, int real_feature,
float threshold, float left_value, float right_value, float gain) { double threshold, double left_value, double right_value, double gain) {
int new_node_idx = num_leaves_ - 1; int new_node_idx = num_leaves_ - 1;
// update parent info // update parent info
int parent = leaf_parent_[leaf]; int parent = leaf_parent_[leaf];
...@@ -89,7 +89,7 @@ void Tree::AddPredictionToScore(const Dataset* data, data_size_t num_data, score ...@@ -89,7 +89,7 @@ void Tree::AddPredictionToScore(const Dataset* data, data_size_t num_data, score
iterators.push_back(data->FeatureAt(i)->bin_data()->GetIterator(start)); iterators.push_back(data->FeatureAt(i)->bin_data()->GetIterator(start));
} }
for (data_size_t i = start; i < end; ++i) { for (data_size_t i = start; i < end; ++i) {
score[i] += leaf_value_[GetLeaf(iterators, i)]; score[i] += static_cast<score_t>(leaf_value_[GetLeaf(iterators, i)]);
} }
}); });
} }
...@@ -103,7 +103,7 @@ void Tree::AddPredictionToScore(const Dataset* data, const data_size_t* used_dat ...@@ -103,7 +103,7 @@ void Tree::AddPredictionToScore(const Dataset* data, const data_size_t* used_dat
iterators.push_back(data->FeatureAt(i)->bin_data()->GetIterator(used_data_indices[start])); iterators.push_back(data->FeatureAt(i)->bin_data()->GetIterator(used_data_indices[start]));
} }
for (data_size_t i = start; i < end; ++i) { for (data_size_t i = start; i < end; ++i) {
score[used_data_indices[i]] += leaf_value_[GetLeaf(iterators, used_data_indices[i])]; score[used_data_indices[i]] += static_cast<score_t>(leaf_value_[GetLeaf(iterators, used_data_indices[i])]);
} }
}); });
} }
...@@ -114,9 +114,9 @@ std::string Tree::ToString() { ...@@ -114,9 +114,9 @@ std::string Tree::ToString() {
ss << "split_feature=" ss << "split_feature="
<< Common::ArrayToString<int>(split_feature_real_, num_leaves_ - 1, ' ') << std::endl; << Common::ArrayToString<int>(split_feature_real_, num_leaves_ - 1, ' ') << std::endl;
ss << "split_gain=" ss << "split_gain="
<< Common::ArrayToString<float>(split_gain_, num_leaves_ - 1, ' ') << std::endl; << Common::ArrayToString<double>(split_gain_, num_leaves_ - 1, ' ') << std::endl;
ss << "threshold=" ss << "threshold="
<< Common::ArrayToString<float>(threshold_, num_leaves_ - 1, ' ') << std::endl; << Common::ArrayToString<double>(threshold_, num_leaves_ - 1, ' ') << std::endl;
ss << "left_child=" ss << "left_child="
<< Common::ArrayToString<int>(left_child_, num_leaves_ - 1, ' ') << std::endl; << Common::ArrayToString<int>(left_child_, num_leaves_ - 1, ' ') << std::endl;
ss << "right_child=" ss << "right_child="
...@@ -124,7 +124,7 @@ std::string Tree::ToString() { ...@@ -124,7 +124,7 @@ std::string Tree::ToString() {
ss << "leaf_parent=" ss << "leaf_parent="
<< Common::ArrayToString<int>(leaf_parent_, num_leaves_, ' ') << std::endl; << Common::ArrayToString<int>(leaf_parent_, num_leaves_, ' ') << std::endl;
ss << "leaf_value=" ss << "leaf_value="
<< Common::ArrayToString<float>(leaf_value_, num_leaves_, ' ') << std::endl; << Common::ArrayToString<double>(leaf_value_, num_leaves_, ' ') << std::endl;
ss << std::endl; ss << std::endl;
return ss.str(); return ss.str();
} }
...@@ -154,10 +154,10 @@ Tree::Tree(const std::string& str) { ...@@ -154,10 +154,10 @@ Tree::Tree(const std::string& str) {
left_child_ = new int[num_leaves_ - 1]; left_child_ = new int[num_leaves_ - 1];
right_child_ = new int[num_leaves_ - 1]; right_child_ = new int[num_leaves_ - 1];
split_feature_real_ = new int[num_leaves_ - 1]; split_feature_real_ = new int[num_leaves_ - 1];
threshold_ = new float[num_leaves_ - 1]; threshold_ = new double[num_leaves_ - 1];
split_gain_ = new float[num_leaves_ - 1]; split_gain_ = new double[num_leaves_ - 1];
leaf_parent_ = new int[num_leaves_]; leaf_parent_ = new int[num_leaves_];
leaf_value_ = new float[num_leaves_]; leaf_value_ = new double[num_leaves_];
split_feature_ = nullptr; split_feature_ = nullptr;
threshold_in_bin_ = nullptr; threshold_in_bin_ = nullptr;
...@@ -165,9 +165,9 @@ Tree::Tree(const std::string& str) { ...@@ -165,9 +165,9 @@ Tree::Tree(const std::string& str) {
Common::StringToIntArray(key_vals["split_feature"], ' ', Common::StringToIntArray(key_vals["split_feature"], ' ',
num_leaves_ - 1, split_feature_real_); num_leaves_ - 1, split_feature_real_);
Common::StringToFloatArray(key_vals["split_gain"], ' ', Common::StringToDoubleArray(key_vals["split_gain"], ' ',
num_leaves_ - 1, split_gain_); num_leaves_ - 1, split_gain_);
Common::StringToFloatArray(key_vals["threshold"], ' ', Common::StringToDoubleArray(key_vals["threshold"], ' ',
num_leaves_ - 1, threshold_); num_leaves_ - 1, threshold_);
Common::StringToIntArray(key_vals["left_child"], ' ', Common::StringToIntArray(key_vals["left_child"], ' ',
num_leaves_ - 1, left_child_); num_leaves_ - 1, left_child_);
...@@ -175,7 +175,7 @@ Tree::Tree(const std::string& str) { ...@@ -175,7 +175,7 @@ Tree::Tree(const std::string& str) {
num_leaves_ - 1, right_child_); num_leaves_ - 1, right_child_);
Common::StringToIntArray(key_vals["leaf_parent"], ' ', Common::StringToIntArray(key_vals["leaf_parent"], ' ',
num_leaves_ , leaf_parent_); num_leaves_ , leaf_parent_);
Common::StringToFloatArray(key_vals["leaf_value"], ' ', Common::StringToDoubleArray(key_vals["leaf_value"], ' ',
num_leaves_ , leaf_value_); num_leaves_ , leaf_value_);
} }
......
...@@ -41,7 +41,7 @@ public: ...@@ -41,7 +41,7 @@ public:
weights_ = metadata.weights(); weights_ = metadata.weights();
if (weights_ == nullptr) { if (weights_ == nullptr) {
sum_weights_ = static_cast<float>(num_data_); sum_weights_ = static_cast<double>(num_data_);
} else { } else {
sum_weights_ = 0.0f; sum_weights_ = 0.0f;
for (data_size_t i = 0; i < num_data; ++i) { for (data_size_t i = 0; i < num_data; ++i) {
...@@ -58,8 +58,8 @@ public: ...@@ -58,8 +58,8 @@ public:
return false; return false;
} }
std::vector<float> Eval(const score_t* score) const override { std::vector<double> Eval(const score_t* score) const override {
score_t sum_loss = 0.0f; double sum_loss = 0.0f;
if (weights_ == nullptr) { if (weights_ == nullptr) {
#pragma omp parallel for schedule(static) reduction(+:sum_loss) #pragma omp parallel for schedule(static) reduction(+:sum_loss)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
...@@ -77,8 +77,8 @@ public: ...@@ -77,8 +77,8 @@ public:
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], prob) * weights_[i]; sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], prob) * weights_[i];
} }
} }
score_t loss = sum_loss / sum_weights_; double loss = sum_loss / sum_weights_;
return std::vector<float>(1, static_cast<float>(loss)); return std::vector<double>(1, loss);
} }
private: private:
...@@ -89,7 +89,7 @@ private: ...@@ -89,7 +89,7 @@ private:
/*! \brief Pointer of weighs */ /*! \brief Pointer of weighs */
const float* weights_; const float* weights_;
/*! \brief Sum weights */ /*! \brief Sum weights */
float sum_weights_; double sum_weights_;
/*! \brief Name of test set */ /*! \brief Name of test set */
std::string name_; std::string name_;
/*! \brief Sigmoid parameter */ /*! \brief Sigmoid parameter */
...@@ -172,7 +172,7 @@ public: ...@@ -172,7 +172,7 @@ public:
weights_ = metadata.weights(); weights_ = metadata.weights();
if (weights_ == nullptr) { if (weights_ == nullptr) {
sum_weights_ = static_cast<float>(num_data_); sum_weights_ = static_cast<double>(num_data_);
} else { } else {
sum_weights_ = 0.0f; sum_weights_ = 0.0f;
for (data_size_t i = 0; i < num_data; ++i) { for (data_size_t i = 0; i < num_data; ++i) {
...@@ -181,7 +181,7 @@ public: ...@@ -181,7 +181,7 @@ public:
} }
} }
std::vector<float> Eval(const score_t* score) const override { std::vector<double> Eval(const score_t* score) const override {
// get indices sorted by score, descent order // get indices sorted by score, descent order
std::vector<data_size_t> sorted_idx; std::vector<data_size_t> sorted_idx;
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
...@@ -189,13 +189,13 @@ public: ...@@ -189,13 +189,13 @@ public:
} }
std::sort(sorted_idx.begin(), sorted_idx.end(), [score](data_size_t a, data_size_t b) {return score[a] > score[b]; }); std::sort(sorted_idx.begin(), sorted_idx.end(), [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
// temp sum of postive label // temp sum of postive label
score_t cur_pos = 0.0f; double cur_pos = 0.0f;
// total sum of postive label // total sum of postive label
score_t sum_pos = 0.0f; double sum_pos = 0.0f;
// accumlate of auc // accumlate of auc
score_t accum = 0.0f; double accum = 0.0f;
// temp sum of negative label // temp sum of negative label
score_t cur_neg = 0.0f; double cur_neg = 0.0f;
score_t threshold = score[sorted_idx[0]]; score_t threshold = score[sorted_idx[0]];
if (weights_ == nullptr) { // no weights if (weights_ == nullptr) { // no weights
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
...@@ -233,11 +233,11 @@ public: ...@@ -233,11 +233,11 @@ public:
} }
accum += cur_neg*(cur_pos * 0.5f + sum_pos); accum += cur_neg*(cur_pos * 0.5f + sum_pos);
sum_pos += cur_pos; sum_pos += cur_pos;
score_t auc = 1.0f; double auc = 1.0f;
if (sum_pos > 0.0f && sum_pos != sum_weights_) { if (sum_pos > 0.0f && sum_pos != sum_weights_) {
auc = accum / (sum_pos *(sum_weights_ - sum_pos)); auc = accum / (sum_pos *(sum_weights_ - sum_pos));
} }
return std::vector<float>(1, static_cast<float>(auc)); return std::vector<double>(1, auc);
} }
private: private:
...@@ -248,7 +248,7 @@ private: ...@@ -248,7 +248,7 @@ private:
/*! \brief Pointer of weighs */ /*! \brief Pointer of weighs */
const float* weights_; const float* weights_;
/*! \brief Sum weights */ /*! \brief Sum weights */
float sum_weights_; double sum_weights_;
/*! \brief Name of test set */ /*! \brief Name of test set */
std::string name_; std::string name_;
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment