"include/LightGBM/vscode:/vscode.git/clone" did not exist on "4cf9376d6652d3d7afa82e98dfb363af9275969d"
parser.hpp 2.88 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#ifndef LIGHTGBM_IO_PARSER_HPP_
#define LIGHTGBM_IO_PARSER_HPP_

#include <LightGBM/utils/common.h>
#include <LightGBM/utils/log.h>

#include <LightGBM/dataset.h>

#include <unordered_map>
#include <vector>
#include <utility>

namespace LightGBM {

class CSVParser: public Parser {
public:
  inline void ParseOneLine(const char* str,
Guolin Ke's avatar
Guolin Ke committed
18
    std::vector<std::pair<int, double>>* out_features) const override {
Guolin Ke's avatar
Guolin Ke committed
19
20
21
22
    int idx = 0;
    double val = 0.0;
    while (*str != '\0') {
      str = Common::Atof(str, &val);
Guolin Ke's avatar
Guolin Ke committed
23
24
25
      if (fabs(val) > 1e-10) {
        out_features->emplace_back(idx, val);
      }
Guolin Ke's avatar
Guolin Ke committed
26
27
28
29
30
31
32
33
34
      ++idx;
      if (*str == ',') {
        ++str;
      } else if (*str != '\0') {
        Log::Stderr("input format error, should be CSV");
      }
    }
  }
  inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features,
Guolin Ke's avatar
Guolin Ke committed
35
    double* out_label) const override {
Guolin Ke's avatar
Guolin Ke committed
36
37
    // first column is label
    str = Common::Atof(str, out_label);
Guolin Ke's avatar
Guolin Ke committed
38
39
40
41
42
    if (*str == ',') {
      ++str;
    } else if (*str != '\0') {
      Log::Stderr("input format error, should be CSV");
    }
Guolin Ke's avatar
Guolin Ke committed
43
44
45
46
47
48
49
50
51
52
53
    return ParseOneLine(str, out_features);
  }
};

class TSVParser: public Parser {
public:
  inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features) const override {
    int idx = 0;
    double val = 0.0;
    while (*str != '\0') {
      str = Common::Atof(str, &val);
Guolin Ke's avatar
Guolin Ke committed
54
55
56
      if (fabs(val) > 1e-10) {
        out_features->emplace_back(idx, val);
      }
Guolin Ke's avatar
Guolin Ke committed
57
58
59
60
61
62
63
64
65
      ++idx;
      if (*str == '\t') {
        ++str;
      } else if (*str != '\0') {
        Log::Stderr("input format error, should be TSV");
      }
    }
  }
  inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features,
Guolin Ke's avatar
Guolin Ke committed
66
    double* out_label) const override {
Guolin Ke's avatar
Guolin Ke committed
67
68
    // first column is label
    str = Common::Atof(str, out_label);
Guolin Ke's avatar
Guolin Ke committed
69
70
71
72
73
    if (*str == '\t') {
      ++str;
    } else if (*str != '\0') {
      Log::Stderr("input format error, should be TSV");
    }
Guolin Ke's avatar
Guolin Ke committed
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
    return ParseOneLine(str, out_features);
  }
};

class LibSVMParser: public Parser {
public:
  inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features) const override {
    int idx = 0;
    double val = 0.0;
    while (*str != '\0') {
      str = Common::Atoi(str, &idx);
      str = Common::SkipSpaceAndTab(str);
      if (*str == ':') {
        ++str;
        str = Common::Atof(str, &val);
        out_features->emplace_back(idx, val);
      } else {
        Log::Stderr("input format error, should be LibSVM");
      }
      str = Common::SkipSpaceAndTab(str);
    }
  }
  inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features,
Guolin Ke's avatar
Guolin Ke committed
97
    double* out_label) const override {
Guolin Ke's avatar
Guolin Ke committed
98
99
100
101
102
103
104
    // first column is label
    str = Common::Atof(str, out_label);
    str = Common::SkipSpaceAndTab(str);
    return ParseOneLine(str, out_features);
  }
};
}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
105
#endif   // LightGBM_IO_PARSER_HPP_