"python-package/vscode:/vscode.git/clone" did not exist on "0c4bb89de578a02b7822ba2bc6af2947e1c8b527"
Commit 3e405f2c authored by Guolin Ke's avatar Guolin Ke
Browse files

Bug fixed for #17 .

parent d6d4a1d2
......@@ -267,27 +267,30 @@ public:
* \param sparse_rate Sparse rate of this bins( num_bin0/num_data )
* \param is_enable_sparse True if enable sparse feature
* \param is_sparse Will set to true if this bin is sparse
* \param default_bin Default bin for zeros value
* \return The bin data object
*/
static Bin* CreateBin(data_size_t num_data, int num_bin,
double sparse_rate, bool is_enable_sparse, bool* is_sparse);
double sparse_rate, bool is_enable_sparse, bool* is_sparse, int default_bin);
/*!
* \brief Create object for bin data of one feature, used for dense feature
* \param num_data Total number of data
* \param num_bin Number of bin
* \param default_bin Default bin for zeros value
* \return The bin data object
*/
static Bin* CreateDenseBin(data_size_t num_data, int num_bin);
static Bin* CreateDenseBin(data_size_t num_data, int num_bin, int default_bin);
/*!
* \brief Create object for bin data of one feature, used for sparse feature
* \param num_data Total number of data
* \param num_bin Number of bin
* \param default_bin Default bin for zeros value
* \return The bin data object
*/
static Bin* CreateSparseBin(data_size_t num_data,
int num_bin);
int num_bin, int default_bin);
};
inline unsigned int BinMapper::ValueToBin(double value) const {
......
......@@ -27,7 +27,7 @@ public:
:bin_mapper_(bin_mapper) {
feature_index_ = feature_idx;
bin_data_ = Bin::CreateBin(num_data, bin_mapper_->num_bin(),
bin_mapper_->sparse_rate(), is_enable_sparse, &is_sparse_);
bin_mapper_->sparse_rate(), is_enable_sparse, &is_sparse_, bin_mapper_->ValueToBin(0));
}
/*!
* \brief Constructor from memory
......@@ -52,9 +52,9 @@ public:
num_data = static_cast<data_size_t>(local_used_indices.size());
}
if (is_sparse_) {
bin_data_ = Bin::CreateSparseBin(num_data, bin_mapper_->num_bin());
bin_data_ = Bin::CreateSparseBin(num_data, bin_mapper_->num_bin(), bin_mapper_->ValueToBin(0));
} else {
bin_data_ = Bin::CreateDenseBin(num_data, bin_mapper_->num_bin());
bin_data_ = Bin::CreateDenseBin(num_data, bin_mapper_->num_bin(), bin_mapper_->ValueToBin(0));
}
// get bin data
bin_data_->LoadFromMemory(memory_ptr, local_used_indices);
......
......@@ -182,35 +182,35 @@ template class OrderedSparseBin<uint16_t>;
template class OrderedSparseBin<uint32_t>;
Bin* Bin::CreateBin(data_size_t num_data, int num_bin, double sparse_rate, bool is_enable_sparse, bool* is_sparse) {
Bin* Bin::CreateBin(data_size_t num_data, int num_bin, double sparse_rate, bool is_enable_sparse, bool* is_sparse, int default_bin) {
// sparse threshold
const double kSparseThreshold = 0.8;
if (sparse_rate >= kSparseThreshold && is_enable_sparse) {
*is_sparse = true;
return CreateSparseBin(num_data, num_bin);
return CreateSparseBin(num_data, num_bin, default_bin);
} else {
*is_sparse = false;
return CreateDenseBin(num_data, num_bin);
return CreateDenseBin(num_data, num_bin, default_bin);
}
}
Bin* Bin::CreateDenseBin(data_size_t num_data, int num_bin) {
Bin* Bin::CreateDenseBin(data_size_t num_data, int num_bin, int default_bin) {
if (num_bin <= 256) {
return new DenseBin<uint8_t>(num_data);
return new DenseBin<uint8_t>(num_data, default_bin);
} else if (num_bin <= 65536) {
return new DenseBin<uint16_t>(num_data);
return new DenseBin<uint16_t>(num_data, default_bin);
} else {
return new DenseBin<uint32_t>(num_data);
return new DenseBin<uint32_t>(num_data, default_bin);
}
}
Bin* Bin::CreateSparseBin(data_size_t num_data, int num_bin) {
Bin* Bin::CreateSparseBin(data_size_t num_data, int num_bin, int default_bin) {
if (num_bin <= 256) {
return new SparseBin<uint8_t>(num_data);
return new SparseBin<uint8_t>(num_data, default_bin);
} else if (num_bin <= 65536) {
return new SparseBin<uint16_t>(num_data);
return new SparseBin<uint16_t>(num_data, default_bin);
} else {
return new SparseBin<uint32_t>(num_data);
return new SparseBin<uint32_t>(num_data, default_bin);
}
}
......
......@@ -189,7 +189,7 @@ void Dataset::ConstructBinMappers(int rank, int num_machines, const std::vector<
// -1 means doesn't use this feature
used_feature_map_ = std::vector<int>(sample_values.size(), -1);
num_total_features_ = sample_values.size();
num_total_features_ = static_cast<int>(sample_values.size());
// start find bins
if (num_machines == 1) {
std::vector<BinMapper*> bin_mappers(sample_values.size());
......
......@@ -16,10 +16,17 @@ namespace LightGBM {
template <typename VAL_T>
class DenseBin: public Bin {
public:
explicit DenseBin(data_size_t num_data)
explicit DenseBin(data_size_t num_data, int default_bin)
: num_data_(num_data) {
data_ = new VAL_T[num_data_];
std::memset(data_, 0, sizeof(VAL_T)*num_data_);
if (default_bin == 0) {
std::memset(data_, 0, sizeof(VAL_T)*num_data_);
} else {
VAL_T default_bin_T = static_cast<VAL_T>(default_bin);
for (data_size_t i = 0; i < num_data_; ++i) {
data_[i] = default_bin_T;
}
}
}
~DenseBin() {
......
......@@ -20,7 +20,9 @@ public:
double val = 0.0;
while (*str != '\0') {
str = Common::Atof(str, &val);
out_features->emplace_back(idx, val);
if (fabs(val) > 1e-10) {
out_features->emplace_back(idx, val);
}
++idx;
if (*str == ',') {
++str;
......@@ -49,7 +51,9 @@ public:
double val = 0.0;
while (*str != '\0') {
str = Common::Atof(str, &val);
out_features->emplace_back(idx, val);
if (fabs(val) > 1e-10) {
out_features->emplace_back(idx, val);
}
++idx;
if (*str == '\t') {
++str;
......
......@@ -24,8 +24,12 @@ class SparseBin:public Bin {
public:
friend class SparseBinIterator<VAL_T>;
explicit SparseBin(data_size_t num_data)
explicit SparseBin(data_size_t num_data, int default_bin)
: num_data_(num_data) {
default_bin_ = static_cast<VAL_T>(default_bin);
if (default_bin_ != 0) {
Log::Stdout("Warning: Having sparse feature with negative values. Will let negative values equal zero as well");
}
#pragma omp parallel
#pragma omp master
{
......@@ -41,7 +45,7 @@ public:
void Push(int tid, data_size_t idx, uint32_t value) override {
// not store zero data
if (value == 0) { return; }
if (value <= default_bin_) { return; }
push_buffers_[tid].emplace_back(idx, static_cast<VAL_T>(value));
}
......@@ -240,6 +244,7 @@ private:
std::vector<std::vector<std::pair<data_size_t, VAL_T>>> push_buffers_;
std::vector<std::pair<data_size_t, data_size_t>> fast_index_;
data_size_t fast_index_shift_;
VAL_T default_bin_;
};
template <typename VAL_T>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment