predictor.hpp 5.15 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
#ifndef LIGHTGBM_PREDICTOR_HPP_
#define LIGHTGBM_PREDICTOR_HPP_

#include <LightGBM/meta.h>
#include <LightGBM/boosting.h>
#include <LightGBM/utils/text_reader.h>
#include <LightGBM/dataset.h>

9
#include <LightGBM/utils/openmp_wrapper.h>
Guolin Ke's avatar
Guolin Ke committed
10
11
12
13
14
15
16

#include <cstring>
#include <cstdio>
#include <vector>
#include <utility>
#include <functional>
#include <string>
Guolin Ke's avatar
Guolin Ke committed
17
#include <memory>
Guolin Ke's avatar
Guolin Ke committed
18
19
20
21

namespace LightGBM {

/*!
zhangyafeikimi's avatar
zhangyafeikimi committed
22
* \brief Used to predict data with input model
Guolin Ke's avatar
Guolin Ke committed
23
24
25
26
27
28
*/
class Predictor {
public:
  /*!
  * \brief Constructor
  * \param boosting Input boosting model
Guolin Ke's avatar
Guolin Ke committed
29
  * \param num_iteration Number of boosting round
30
  * \param is_raw_score True if need to predict result with raw score
zhangyafeikimi's avatar
zhangyafeikimi committed
31
  * \param is_predict_leaf_index True if output leaf index instead of prediction score
Guolin Ke's avatar
Guolin Ke committed
32
  */
Guolin Ke's avatar
Guolin Ke committed
33
34
35
36
  Predictor(Boosting* boosting, int num_iteration,
            bool is_raw_score, bool is_predict_leaf_index) {

    feature_mapper_ = boosting->InitPredict(num_iteration);
Guolin Ke's avatar
Guolin Ke committed
37
    boosting_ = boosting;
Guolin Ke's avatar
Guolin Ke committed
38
39
40
41
42
43
    num_pred_one_row_ = boosting_->NumPredictOneRow(num_iteration, is_predict_leaf_index);

    num_total_features_ = static_cast<int>(feature_mapper_.size());
    num_used_features_ = 1;
    for (auto fidx : feature_mapper_) {
      num_used_features_ = std::max(num_used_features_, fidx + 1);
Guolin Ke's avatar
Guolin Ke committed
44
    }
Guolin Ke's avatar
Guolin Ke committed
45
46

    features_ = std::vector<double>(num_used_features_);
Guolin Ke's avatar
Guolin Ke committed
47
    if (is_predict_leaf_index) {
Guolin Ke's avatar
Guolin Ke committed
48
49
      predict_fun_ = [this](const std::vector<std::pair<int, double>>& features, double* output) {
        PutFeatureValuesToBuffer(features);
Guolin Ke's avatar
Guolin Ke committed
50
        // get result for leaf index
Guolin Ke's avatar
Guolin Ke committed
51
        boosting_->PredictLeafIndex(features_.data(), output);
Guolin Ke's avatar
Guolin Ke committed
52
      };
Guolin Ke's avatar
Guolin Ke committed
53

Guolin Ke's avatar
Guolin Ke committed
54
    } else {
Guolin Ke's avatar
Guolin Ke committed
55
      if (is_raw_score) {
Guolin Ke's avatar
Guolin Ke committed
56
57
        predict_fun_ = [this](const std::vector<std::pair<int, double>>& features, double* output) {
          PutFeatureValuesToBuffer(features);
Guolin Ke's avatar
Guolin Ke committed
58
          // get result without sigmoid transformation
Guolin Ke's avatar
Guolin Ke committed
59
          boosting_->PredictRaw(features_.data(), output);
Guolin Ke's avatar
Guolin Ke committed
60
61
        };
      } else {
Guolin Ke's avatar
Guolin Ke committed
62
63
64
        predict_fun_ = [this](const std::vector<std::pair<int, double>>& features, double* output) {
          PutFeatureValuesToBuffer(features);
          boosting_->Predict(features_.data(), output);
Guolin Ke's avatar
Guolin Ke committed
65
66
67
        };
      }
    }
Guolin Ke's avatar
Guolin Ke committed
68
69
70
71
72
73
74
  }
  /*!
  * \brief Destructor
  */
  ~Predictor() {
  }

zhangyafeikimi's avatar
zhangyafeikimi committed
75
  inline const PredictFunction& GetPredictFunction() const {
Guolin Ke's avatar
Guolin Ke committed
76
    return predict_fun_;
77
  }
78

Guolin Ke's avatar
Guolin Ke committed
79
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
80
  * \brief predicting on data, then saving result to disk
Guolin Ke's avatar
Guolin Ke committed
81
82
83
  * \param data_filename Filename of data
  * \param result_filename Filename of output result
  */
Guolin Ke's avatar
Guolin Ke committed
84
  void Predict(const char* data_filename, const char* result_filename, bool has_header) {
Guolin Ke's avatar
Guolin Ke committed
85
86
    FILE* result_file;

Guolin Ke's avatar
Guolin Ke committed
87
    #ifdef _MSC_VER
Guolin Ke's avatar
Guolin Ke committed
88
    fopen_s(&result_file, result_filename, "w");
Guolin Ke's avatar
Guolin Ke committed
89
    #else
Guolin Ke's avatar
Guolin Ke committed
90
    result_file = fopen(result_filename, "w");
Guolin Ke's avatar
Guolin Ke committed
91
    #endif
Guolin Ke's avatar
Guolin Ke committed
92
93

    if (result_file == NULL) {
94
      Log::Fatal("Prediction results file %s doesn't exist", data_filename);
Guolin Ke's avatar
Guolin Ke committed
95
    }
Guolin Ke's avatar
Guolin Ke committed
96
    auto parser = std::unique_ptr<Parser>(Parser::CreateParser(data_filename, has_header, num_used_features_, boosting_->LabelIdx()));
Guolin Ke's avatar
Guolin Ke committed
97
98

    if (parser == nullptr) {
99
      Log::Fatal("Could not recognize the data format of data file %s", data_filename);
Guolin Ke's avatar
Guolin Ke committed
100
101
102
    }

    // function for parse data
103
104
    std::function<void(const char*, std::vector<std::pair<int, double>>*)> parser_fun;
    double tmp_label;
Guolin Ke's avatar
Guolin Ke committed
105
    parser_fun = [this, &parser, &tmp_label]
106
    (const char* buffer, std::vector<std::pair<int, double>>* feature) {
Guolin Ke's avatar
Guolin Ke committed
107
108
109
      parser->ParseOneLine(buffer, feature, &tmp_label);
    };

Guolin Ke's avatar
Guolin Ke committed
110
    std::function<void(data_size_t, const std::vector<std::string>&)> process_fun =
Guolin Ke's avatar
Guolin Ke committed
111
      [this, &parser_fun, &result_file]
Guolin Ke's avatar
Guolin Ke committed
112
    (data_size_t, const std::vector<std::string>& lines) {
113
      std::vector<std::pair<int, double>> oneline_features;
114
      for (data_size_t i = 0; i < static_cast<data_size_t>(lines.size()); ++i) {
Guolin Ke's avatar
Guolin Ke committed
115
116
117
118
        oneline_features.clear();
        // parser
        parser_fun(lines[i].c_str(), &oneline_features);
        // predict
Guolin Ke's avatar
Guolin Ke committed
119
120
121
122
        std::vector<double> result(num_pred_one_row_);
        predict_fun_(oneline_features, result.data());
        auto str_result = Common::Join<double>(result, "\t");
        fprintf(result_file, "%s\n", str_result.c_str());
Guolin Ke's avatar
Guolin Ke committed
123
124
      }
    };
Guolin Ke's avatar
Guolin Ke committed
125
    TextReader<data_size_t> predict_data_reader(data_filename, has_header);
Guolin Ke's avatar
Guolin Ke committed
126
127
128
129
130
    predict_data_reader.ReadAllAndProcessParallel(process_fun);
    fclose(result_file);
  }

private:
Guolin Ke's avatar
Guolin Ke committed
131
132
  void PutFeatureValuesToBuffer(const std::vector<std::pair<int, double>>& features) {
    std::memset(features_.data(), 0, sizeof(double)*num_used_features_);
Guolin Ke's avatar
Guolin Ke committed
133
    // put feature value
Guolin Ke's avatar
Guolin Ke committed
134
135
136
137
138
139
140
    int loop_size = static_cast<int>(features.size());
    #pragma omp parallel for schedule(static, 512) if(loop_size >= 1024) 
    for (int i = 0; i < loop_size; ++i) {
      if (features[i].first >= num_total_features_) continue;
      auto fidx = feature_mapper_[features[i].first];
      if (fidx >= 0) {
        features_[fidx] = features[i].second;
Guolin Ke's avatar
Guolin Ke committed
141
142
143
      }
    }
  }
Guolin Ke's avatar
Guolin Ke committed
144
145
146
  /*! \brief Boosting model */
  const Boosting* boosting_;
  /*! \brief Buffer for feature values */
Guolin Ke's avatar
Guolin Ke committed
147
  std::vector<double> features_;
Guolin Ke's avatar
Guolin Ke committed
148
  /*! \brief Number of features */
Guolin Ke's avatar
Guolin Ke committed
149
  int num_used_features_;
Guolin Ke's avatar
Guolin Ke committed
150
151
  /*! \brief function for prediction */
  PredictFunction predict_fun_;
Guolin Ke's avatar
Guolin Ke committed
152
153
154
  int num_pred_one_row_;
  std::vector<int> feature_mapper_;
  int num_total_features_;
Guolin Ke's avatar
Guolin Ke committed
155
156
157
158
};

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
159
#endif   // LightGBM_PREDICTOR_HPP_