Commit 3e405f2c authored by Guolin Ke's avatar Guolin Ke
Browse files

Bug fixed for #17 .

parent d6d4a1d2
...@@ -267,27 +267,30 @@ public: ...@@ -267,27 +267,30 @@ public:
* \param sparse_rate Sparse rate of this bins( num_bin0/num_data ) * \param sparse_rate Sparse rate of this bins( num_bin0/num_data )
* \param is_enable_sparse True if enable sparse feature * \param is_enable_sparse True if enable sparse feature
* \param is_sparse Will set to true if this bin is sparse * \param is_sparse Will set to true if this bin is sparse
* \param default_bin Default bin for zeros value
* \return The bin data object * \return The bin data object
*/ */
static Bin* CreateBin(data_size_t num_data, int num_bin, static Bin* CreateBin(data_size_t num_data, int num_bin,
double sparse_rate, bool is_enable_sparse, bool* is_sparse); double sparse_rate, bool is_enable_sparse, bool* is_sparse, int default_bin);
/*! /*!
* \brief Create object for bin data of one feature, used for dense feature * \brief Create object for bin data of one feature, used for dense feature
* \param num_data Total number of data * \param num_data Total number of data
* \param num_bin Number of bin * \param num_bin Number of bin
* \param default_bin Default bin for zeros value
* \return The bin data object * \return The bin data object
*/ */
static Bin* CreateDenseBin(data_size_t num_data, int num_bin); static Bin* CreateDenseBin(data_size_t num_data, int num_bin, int default_bin);
/*! /*!
* \brief Create object for bin data of one feature, used for sparse feature * \brief Create object for bin data of one feature, used for sparse feature
* \param num_data Total number of data * \param num_data Total number of data
* \param num_bin Number of bin * \param num_bin Number of bin
* \param default_bin Default bin for zeros value
* \return The bin data object * \return The bin data object
*/ */
static Bin* CreateSparseBin(data_size_t num_data, static Bin* CreateSparseBin(data_size_t num_data,
int num_bin); int num_bin, int default_bin);
}; };
inline unsigned int BinMapper::ValueToBin(double value) const { inline unsigned int BinMapper::ValueToBin(double value) const {
......
...@@ -27,7 +27,7 @@ public: ...@@ -27,7 +27,7 @@ public:
:bin_mapper_(bin_mapper) { :bin_mapper_(bin_mapper) {
feature_index_ = feature_idx; feature_index_ = feature_idx;
bin_data_ = Bin::CreateBin(num_data, bin_mapper_->num_bin(), bin_data_ = Bin::CreateBin(num_data, bin_mapper_->num_bin(),
bin_mapper_->sparse_rate(), is_enable_sparse, &is_sparse_); bin_mapper_->sparse_rate(), is_enable_sparse, &is_sparse_, bin_mapper_->ValueToBin(0));
} }
/*! /*!
* \brief Constructor from memory * \brief Constructor from memory
...@@ -52,9 +52,9 @@ public: ...@@ -52,9 +52,9 @@ public:
num_data = static_cast<data_size_t>(local_used_indices.size()); num_data = static_cast<data_size_t>(local_used_indices.size());
} }
if (is_sparse_) { if (is_sparse_) {
bin_data_ = Bin::CreateSparseBin(num_data, bin_mapper_->num_bin()); bin_data_ = Bin::CreateSparseBin(num_data, bin_mapper_->num_bin(), bin_mapper_->ValueToBin(0));
} else { } else {
bin_data_ = Bin::CreateDenseBin(num_data, bin_mapper_->num_bin()); bin_data_ = Bin::CreateDenseBin(num_data, bin_mapper_->num_bin(), bin_mapper_->ValueToBin(0));
} }
// get bin data // get bin data
bin_data_->LoadFromMemory(memory_ptr, local_used_indices); bin_data_->LoadFromMemory(memory_ptr, local_used_indices);
......
...@@ -182,35 +182,35 @@ template class OrderedSparseBin<uint16_t>; ...@@ -182,35 +182,35 @@ template class OrderedSparseBin<uint16_t>;
template class OrderedSparseBin<uint32_t>; template class OrderedSparseBin<uint32_t>;
Bin* Bin::CreateBin(data_size_t num_data, int num_bin, double sparse_rate, bool is_enable_sparse, bool* is_sparse) { Bin* Bin::CreateBin(data_size_t num_data, int num_bin, double sparse_rate, bool is_enable_sparse, bool* is_sparse, int default_bin) {
// sparse threshold // sparse threshold
const double kSparseThreshold = 0.8; const double kSparseThreshold = 0.8;
if (sparse_rate >= kSparseThreshold && is_enable_sparse) { if (sparse_rate >= kSparseThreshold && is_enable_sparse) {
*is_sparse = true; *is_sparse = true;
return CreateSparseBin(num_data, num_bin); return CreateSparseBin(num_data, num_bin, default_bin);
} else { } else {
*is_sparse = false; *is_sparse = false;
return CreateDenseBin(num_data, num_bin); return CreateDenseBin(num_data, num_bin, default_bin);
} }
} }
Bin* Bin::CreateDenseBin(data_size_t num_data, int num_bin) { Bin* Bin::CreateDenseBin(data_size_t num_data, int num_bin, int default_bin) {
if (num_bin <= 256) { if (num_bin <= 256) {
return new DenseBin<uint8_t>(num_data); return new DenseBin<uint8_t>(num_data, default_bin);
} else if (num_bin <= 65536) { } else if (num_bin <= 65536) {
return new DenseBin<uint16_t>(num_data); return new DenseBin<uint16_t>(num_data, default_bin);
} else { } else {
return new DenseBin<uint32_t>(num_data); return new DenseBin<uint32_t>(num_data, default_bin);
} }
} }
Bin* Bin::CreateSparseBin(data_size_t num_data, int num_bin) { Bin* Bin::CreateSparseBin(data_size_t num_data, int num_bin, int default_bin) {
if (num_bin <= 256) { if (num_bin <= 256) {
return new SparseBin<uint8_t>(num_data); return new SparseBin<uint8_t>(num_data, default_bin);
} else if (num_bin <= 65536) { } else if (num_bin <= 65536) {
return new SparseBin<uint16_t>(num_data); return new SparseBin<uint16_t>(num_data, default_bin);
} else { } else {
return new SparseBin<uint32_t>(num_data); return new SparseBin<uint32_t>(num_data, default_bin);
} }
} }
......
...@@ -189,7 +189,7 @@ void Dataset::ConstructBinMappers(int rank, int num_machines, const std::vector< ...@@ -189,7 +189,7 @@ void Dataset::ConstructBinMappers(int rank, int num_machines, const std::vector<
// -1 means doesn't use this feature // -1 means doesn't use this feature
used_feature_map_ = std::vector<int>(sample_values.size(), -1); used_feature_map_ = std::vector<int>(sample_values.size(), -1);
num_total_features_ = sample_values.size(); num_total_features_ = static_cast<int>(sample_values.size());
// start find bins // start find bins
if (num_machines == 1) { if (num_machines == 1) {
std::vector<BinMapper*> bin_mappers(sample_values.size()); std::vector<BinMapper*> bin_mappers(sample_values.size());
......
...@@ -16,10 +16,17 @@ namespace LightGBM { ...@@ -16,10 +16,17 @@ namespace LightGBM {
template <typename VAL_T> template <typename VAL_T>
class DenseBin: public Bin { class DenseBin: public Bin {
public: public:
explicit DenseBin(data_size_t num_data) explicit DenseBin(data_size_t num_data, int default_bin)
: num_data_(num_data) { : num_data_(num_data) {
data_ = new VAL_T[num_data_]; data_ = new VAL_T[num_data_];
std::memset(data_, 0, sizeof(VAL_T)*num_data_); if (default_bin == 0) {
std::memset(data_, 0, sizeof(VAL_T)*num_data_);
} else {
VAL_T default_bin_T = static_cast<VAL_T>(default_bin);
for (data_size_t i = 0; i < num_data_; ++i) {
data_[i] = default_bin_T;
}
}
} }
~DenseBin() { ~DenseBin() {
......
...@@ -20,7 +20,9 @@ public: ...@@ -20,7 +20,9 @@ public:
double val = 0.0; double val = 0.0;
while (*str != '\0') { while (*str != '\0') {
str = Common::Atof(str, &val); str = Common::Atof(str, &val);
out_features->emplace_back(idx, val); if (fabs(val) > 1e-10) {
out_features->emplace_back(idx, val);
}
++idx; ++idx;
if (*str == ',') { if (*str == ',') {
++str; ++str;
...@@ -49,7 +51,9 @@ public: ...@@ -49,7 +51,9 @@ public:
double val = 0.0; double val = 0.0;
while (*str != '\0') { while (*str != '\0') {
str = Common::Atof(str, &val); str = Common::Atof(str, &val);
out_features->emplace_back(idx, val); if (fabs(val) > 1e-10) {
out_features->emplace_back(idx, val);
}
++idx; ++idx;
if (*str == '\t') { if (*str == '\t') {
++str; ++str;
......
...@@ -24,8 +24,12 @@ class SparseBin:public Bin { ...@@ -24,8 +24,12 @@ class SparseBin:public Bin {
public: public:
friend class SparseBinIterator<VAL_T>; friend class SparseBinIterator<VAL_T>;
explicit SparseBin(data_size_t num_data) explicit SparseBin(data_size_t num_data, int default_bin)
: num_data_(num_data) { : num_data_(num_data) {
default_bin_ = static_cast<VAL_T>(default_bin);
if (default_bin_ != 0) {
Log::Stdout("Warning: Having sparse feature with negative values. Will let negative values equal zero as well");
}
#pragma omp parallel #pragma omp parallel
#pragma omp master #pragma omp master
{ {
...@@ -41,7 +45,7 @@ public: ...@@ -41,7 +45,7 @@ public:
void Push(int tid, data_size_t idx, uint32_t value) override { void Push(int tid, data_size_t idx, uint32_t value) override {
// not store zero data // not store zero data
if (value == 0) { return; } if (value <= default_bin_) { return; }
push_buffers_[tid].emplace_back(idx, static_cast<VAL_T>(value)); push_buffers_[tid].emplace_back(idx, static_cast<VAL_T>(value));
} }
...@@ -240,6 +244,7 @@ private: ...@@ -240,6 +244,7 @@ private:
std::vector<std::vector<std::pair<data_size_t, VAL_T>>> push_buffers_; std::vector<std::vector<std::pair<data_size_t, VAL_T>>> push_buffers_;
std::vector<std::pair<data_size_t, data_size_t>> fast_index_; std::vector<std::pair<data_size_t, data_size_t>> fast_index_;
data_size_t fast_index_shift_; data_size_t fast_index_shift_;
VAL_T default_bin_;
}; };
template <typename VAL_T> template <typename VAL_T>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment