parser.hpp 2.96 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#ifndef LIGHTGBM_IO_PARSER_HPP_
#define LIGHTGBM_IO_PARSER_HPP_

#include <LightGBM/utils/common.h>
#include <LightGBM/utils/log.h>

#include <LightGBM/dataset.h>

#include <unordered_map>
#include <vector>
#include <utility>

namespace LightGBM {

class CSVParser: public Parser {
public:
  inline void ParseOneLine(const char* str,
                           std::vector<std::pair<int, double>>* out_features) const override {
    int idx = 0;
    double val = 0.0;
    while (*str != '\0') {
      str = Common::Atof(str, &val);
      out_features->emplace_back(idx, val);
      ++idx;
      if (*str == ',') {
        ++str;
      } else if (*str != '\0') {
        Log::Stderr("input format error, should be CSV");
      }
    }
  }
  inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features,
                                                           double* out_label) const override {
    // first column is label
    str = Common::Atof(str, out_label);

	if (*str == ',') {
		++str;
	} else if (*str != '\0') {
		Log::Stderr("input format error, should be CSV");
	}

    return ParseOneLine(str, out_features);
  }
};

class TSVParser: public Parser {
public:
  inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features) const override {
    int idx = 0;
    double val = 0.0;
    while (*str != '\0') {
      str = Common::Atof(str, &val);
      out_features->emplace_back(idx, val);
      ++idx;
      if (*str == '\t') {
        ++str;
      } else if (*str != '\0') {
        Log::Stderr("input format error, should be TSV");
      }
    }
  }
  inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features,
                                                           double* out_label) const override{
    // first column is label
    str = Common::Atof(str, out_label);

	if (*str == '\t') {
		++str;
	} else if (*str != '\0') {
		Log::Stderr("input format error, should be TSV");
	}

    return ParseOneLine(str, out_features);
  }
};

class LibSVMParser: public Parser {
public:
  inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features) const override {
    int idx = 0;
    double val = 0.0;
    while (*str != '\0') {
      str = Common::Atoi(str, &idx);
      str = Common::SkipSpaceAndTab(str);
      if (*str == ':') {
        ++str;
        str = Common::Atof(str, &val);
        out_features->emplace_back(idx, val);
      } else {
        Log::Stderr("input format error, should be LibSVM");
      }
      str = Common::SkipSpaceAndTab(str);
    }
  }
  inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features,
                                                            double* out_label) const override{
    // first column is label
    str = Common::Atof(str, out_label);
    str = Common::SkipSpaceAndTab(str);
    return ParseOneLine(str, out_features);
  }
};
}  // namespace LightGBM
#endif  #endif  // LightGBM_IO_PARSER_HPP_