#ifndef LIGHTGBM_IO_PARSER_HPP_ #define LIGHTGBM_IO_PARSER_HPP_ #include #include #include #include #include #include namespace LightGBM { class CSVParser: public Parser { public: explicit CSVParser(int label_idx) :label_idx_(label_idx) { } inline void ParseOneLine(const char* str, std::vector>* out_features, double* out_label) const override { int idx = 0; double val = 0.0f; int bias = 0; *out_label = 0.0f; while (*str != '\0') { str = Common::Atof(str, &val); if (idx == label_idx_) { *out_label = val; bias = -1; } else if (std::fabs(val) > kEpsilon || std::isnan(val)) { out_features->emplace_back(idx + bias, val); } ++idx; if (*str == ',') { ++str; } else if (*str != '\0') { Log::Fatal("Input format error when parsing as CSV"); } } } private: int label_idx_ = 0; }; class TSVParser: public Parser { public: explicit TSVParser(int label_idx) :label_idx_(label_idx) { } inline void ParseOneLine(const char* str, std::vector>* out_features, double* out_label) const override { int idx = 0; double val = 0.0f; int bias = 0; while (*str != '\0') { str = Common::Atof(str, &val); if (idx == label_idx_) { *out_label = val; bias = -1; } else if (std::fabs(val) > kEpsilon || std::isnan(val)) { out_features->emplace_back(idx + bias, val); } ++idx; if (*str == '\t') { ++str; } else if (*str != '\0') { Log::Fatal("Input format error when parsing as TSV"); } } } private: int label_idx_ = 0; }; class LibSVMParser: public Parser { public: explicit LibSVMParser(int label_idx) :label_idx_(label_idx) { if (label_idx > 0) { Log::Fatal("Label should be the first column in a LibSVM file"); } } inline void ParseOneLine(const char* str, std::vector>* out_features, double* out_label) const override { int idx = 0; double val = 0.0f; if (label_idx_ == 0) { str = Common::Atof(str, &val); *out_label = val; str = Common::SkipSpaceAndTab(str); } while (*str != '\0') { str = Common::Atoi(str, &idx); str = Common::SkipSpaceAndTab(str); if (*str == ':') { ++str; str = Common::Atof(str, &val); out_features->emplace_back(idx, val); } else { Log::Fatal("Input format error when parsing as LibSVM"); } str = Common::SkipSpaceAndTab(str); } } private: int label_idx_ = 0; }; } // namespace LightGBM #endif // LightGBM_IO_PARSER_HPP_