feature.h 4.07 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#ifndef LIGHTGBM_FEATURE_H_
#define LIGHTGBM_FEATURE_H_

#include <LightGBM/utils/random.h>

#include <LightGBM/meta.h>
#include <LightGBM/bin.h>

#include <cstdio>

#include <vector>

namespace LightGBM {

/*! \brief Used to store data and provide some operations on one feature*/
class Feature {
public:
  /*!
  * \brief Constructor
  * \param feature_idx Index of this feature
  * \param bin_mapper Bin mapper for this feature
  * \param num_data Total number of data
  * \param is_enable_sparse True if enable sparse feature
  */
  Feature(int feature_idx, BinMapper* bin_mapper,
    data_size_t num_data, bool is_enable_sparse)
    :bin_mapper_(bin_mapper) {
    feature_index_ = feature_idx;
    bin_data_ = Bin::CreateBin(num_data, bin_mapper_->num_bin(),
      bin_mapper_->sparse_rate(), is_enable_sparse, &is_sparse_);
  }
  /*!
  * \brief Constructor from memory
  * \param memory Pointer of memory
  * \param num_all_data Number of global data
  * \param local_used_indices Local used indices, empty means using all data
  */
  Feature(const void* memory, data_size_t num_all_data,
    const std::vector<data_size_t>& local_used_indices) {
    const char* memory_ptr = reinterpret_cast<const char*>(memory);
    // get featuer index
    feature_index_ = *(reinterpret_cast<const int*>(memory_ptr));
    memory_ptr += sizeof(feature_index_);
    // get is_sparse
    is_sparse_ = *(reinterpret_cast<const bool*>(memory_ptr));
    memory_ptr += sizeof(is_sparse_);
    // get bin mapper
    bin_mapper_ = new BinMapper(memory_ptr);
    memory_ptr += bin_mapper_->SizesInByte();
    data_size_t num_data = num_all_data;
    if (local_used_indices.size() > 0) {
      num_data = static_cast<data_size_t>(local_used_indices.size());
    }
    if (is_sparse_) {
      bin_data_ = Bin::CreateSparseBin(num_data, bin_mapper_->num_bin());
    } else {
      bin_data_ = Bin::CreateDenseBin(num_data, bin_mapper_->num_bin());
    }
    // get bin data
    bin_data_->LoadFromMemory(memory_ptr, local_used_indices);
  }
  /*! \brief Destructor */
  ~Feature() {
    delete bin_mapper_;
    delete bin_data_;
  }

  /*!
  * \brief Push one record, will auto convert to bin and push to bin data
  * \param tid Thread id
  * \param idx Index of record
  * \param value feature value of record
  */
  inline void PushData(int tid, data_size_t line_idx, double value) {
    unsigned int bin = bin_mapper_->ValueToBin(value);
    bin_data_->Push(tid, line_idx, bin);
  }
  inline void FinishLoad() { bin_data_->FinishLoad(); }
  /*! \brief Index of this feature */
  inline int feature_index() const { return feature_index_; }
  /*! \brief Bin mapper that this feature used */
  inline const BinMapper* bin_mapper() const { return bin_mapper_; }
  /*! \brief Number of bin of this feature */
  inline int num_bin() const { return bin_mapper_->num_bin(); }
  /*! \brief Get bin data of this feature */
  inline const Bin* bin_data() const { return bin_data_; }
  /*!
  * \brief From bin to feature value
  * \param bin
  * \return Feature value of this bin
  */
  inline double BinToValue(unsigned int bin)
    const { return bin_mapper_->BinToValue(bin); }

  /*!
  * \brief Save binary data to file
  * \param file File want to write
  */
  void SaveBinaryToFile(FILE* file) const {
    fwrite(&feature_index_, sizeof(feature_index_), 1, file);
    fwrite(&is_sparse_, sizeof(is_sparse_), 1, file);
    bin_mapper_->SaveBinaryToFile(file);
    bin_data_->SaveBinaryToFile(file);
  }
  /*!
  * \brief Get sizes in byte of this object
  */
  size_t SizesInByte() const {
    return sizeof(feature_index_) + sizeof(is_sparse_) +
      bin_mapper_->SizesInByte() + bin_data_->SizesInByte();
  }
  /*! \brief Disable copy */
  Feature& operator=(const Feature&) = delete;
  /*! \brief Disable copy */
  Feature(const Feature&) = delete;

private:
  /*! \brief Index of this feature */
  int feature_index_;
  /*! \brief Bin mapper that this feature used */
  BinMapper* bin_mapper_;
  /*! \brief Bin data of this feature */
  Bin* bin_data_;
  /*! \brief True if this feature is sparse */
  bool is_sparse_;
};


}  // namespace LightGBM

#endif  #endif  // LightGBM_FEATURE_H_