parser.hpp 2.88 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#ifndef LIGHTGBM_IO_PARSER_HPP_
#define LIGHTGBM_IO_PARSER_HPP_

#include <LightGBM/utils/common.h>
#include <LightGBM/utils/log.h>

#include <LightGBM/dataset.h>

#include <unordered_map>
#include <vector>
#include <utility>

namespace LightGBM {

class CSVParser: public Parser {
public:
  inline void ParseOneLine(const char* str,
Guolin Ke's avatar
Guolin Ke committed
18
    std::vector<std::pair<int, double>>* out_features) const override {
Guolin Ke's avatar
Guolin Ke committed
19
20
21
22
    int idx = 0;
    double val = 0.0;
    while (*str != '\0') {
      str = Common::Atof(str, &val);
Guolin Ke's avatar
Guolin Ke committed
23
24
25
      if (fabs(val) > 1e-10) {
        out_features->emplace_back(idx, val);
      }
Guolin Ke's avatar
Guolin Ke committed
26
27
28
29
      ++idx;
      if (*str == ',') {
        ++str;
      } else if (*str != '\0') {
Qiwei Ye's avatar
Qiwei Ye committed
30
        Log::Fatal("input format error, should be CSV");
Guolin Ke's avatar
Guolin Ke committed
31
32
33
34
      }
    }
  }
  inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features,
Guolin Ke's avatar
Guolin Ke committed
35
    double* out_label) const override {
Guolin Ke's avatar
Guolin Ke committed
36
37
    // first column is label
    str = Common::Atof(str, out_label);
Guolin Ke's avatar
Guolin Ke committed
38
39
40
    if (*str == ',') {
      ++str;
    } else if (*str != '\0') {
Qiwei Ye's avatar
Qiwei Ye committed
41
      Log::Fatal("input format error, should be CSV");
Guolin Ke's avatar
Guolin Ke committed
42
    }
Guolin Ke's avatar
Guolin Ke committed
43
44
45
46
47
48
49
50
51
52
53
    return ParseOneLine(str, out_features);
  }
};

class TSVParser: public Parser {
public:
  inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features) const override {
    int idx = 0;
    double val = 0.0;
    while (*str != '\0') {
      str = Common::Atof(str, &val);
Guolin Ke's avatar
Guolin Ke committed
54
55
56
      if (fabs(val) > 1e-10) {
        out_features->emplace_back(idx, val);
      }
Guolin Ke's avatar
Guolin Ke committed
57
58
59
60
      ++idx;
      if (*str == '\t') {
        ++str;
      } else if (*str != '\0') {
Qiwei Ye's avatar
Qiwei Ye committed
61
        Log::Fatal("input format error, should be TSV");
Guolin Ke's avatar
Guolin Ke committed
62
63
64
65
      }
    }
  }
  inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features,
Guolin Ke's avatar
Guolin Ke committed
66
    double* out_label) const override {
Guolin Ke's avatar
Guolin Ke committed
67
68
    // first column is label
    str = Common::Atof(str, out_label);
Guolin Ke's avatar
Guolin Ke committed
69
70
71
    if (*str == '\t') {
      ++str;
    } else if (*str != '\0') {
Qiwei Ye's avatar
Qiwei Ye committed
72
      Log::Fatal("input format error, should be TSV");
Guolin Ke's avatar
Guolin Ke committed
73
    }
Guolin Ke's avatar
Guolin Ke committed
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
    return ParseOneLine(str, out_features);
  }
};

class LibSVMParser: public Parser {
public:
  inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features) const override {
    int idx = 0;
    double val = 0.0;
    while (*str != '\0') {
      str = Common::Atoi(str, &idx);
      str = Common::SkipSpaceAndTab(str);
      if (*str == ':') {
        ++str;
        str = Common::Atof(str, &val);
        out_features->emplace_back(idx, val);
      } else {
Qiwei Ye's avatar
Qiwei Ye committed
91
        Log::Fatal("input format error, should be LibSVM");
Guolin Ke's avatar
Guolin Ke committed
92
93
94
95
96
      }
      str = Common::SkipSpaceAndTab(str);
    }
  }
  inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features,
Guolin Ke's avatar
Guolin Ke committed
97
    double* out_label) const override {
Guolin Ke's avatar
Guolin Ke committed
98
99
100
101
102
103
104
    // first column is label
    str = Common::Atof(str, out_label);
    str = Common::SkipSpaceAndTab(str);
    return ParseOneLine(str, out_features);
  }
};
}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
105
#endif   // LightGBM_IO_PARSER_HPP_