common.h 8.61 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
#ifndef LIGHTGBM_UTILS_COMMON_FUN_H_
#define LIGHTGBM_UTILS_COMMON_FUN_H_

#include <LightGBM/utils/log.h>

#include <cstdio>
#include <string>
#include <vector>
#include <sstream>
#include <cstdint>
11
#include <algorithm>
12
#include <cmath>
Guolin Ke's avatar
Guolin Ke committed
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38

namespace LightGBM {

namespace Common {

template<typename T>
inline static T Max(const T& a, const T& b) {
  return a > b ? a : b;
}

template<typename T>
inline static T Min(const T& a, const T& b) {
  return a < b ? a : b;
}



inline static std::string& Trim(std::string& str) {
  if (str.size() <= 0) {
    return str;
  }
  str.erase(str.find_last_not_of(" \f\n\r\t\v") + 1);
  str.erase(0, str.find_first_not_of(" \f\n\r\t\v"));
  return str;
}

39
40
41
42
43
44
45
46
inline static std::string& RemoveQuotationSymbol(std::string& str) {
  if (str.size() <= 0) {
    return str;
  }
  str.erase(str.find_last_not_of("'\"") + 1);
  str.erase(0, str.find_first_not_of("'\""));
  return str;
}
Guolin Ke's avatar
Guolin Ke committed
47
48
49
50
51
52
53
inline static bool StartsWith(const std::string& str, const std::string prefix) {
  if (str.substr(0, prefix.size()) == prefix) {
    return true;
  } else {
    return false;
  }
}
Guolin Ke's avatar
Guolin Ke committed
54
inline static std::vector<std::string> Split(const char* c_str, char delimiter) {
Guolin Ke's avatar
Guolin Ke committed
55
  std::vector<std::string> ret;
Guolin Ke's avatar
Guolin Ke committed
56
57
58
59
60
61
62
  std::string str(c_str);
  size_t i = 0;
  size_t pos = str.find(delimiter);
  while (pos != std::string::npos) {
    ret.push_back(str.substr(i, pos - i));
    i = ++pos;
    pos = str.find(delimiter, pos);
Guolin Ke's avatar
Guolin Ke committed
63
  }
Guolin Ke's avatar
Guolin Ke committed
64
  ret.push_back(str.substr(i));
Guolin Ke's avatar
Guolin Ke committed
65
66
67
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
inline static std::vector<std::string> Split(const char* c_str, const char* delimiters) {
  // will split when met any chars in delimiters
  std::vector<std::string> ret;
  std::string str(c_str);
  size_t i = 0;
  size_t pos = str.find_first_of(delimiters);
  while (pos != std::string::npos) {
    ret.push_back(str.substr(i, pos - i));
    i = ++pos;
    pos = str.find_first_of(delimiters, pos);
  }
  ret.push_back(str.substr(i));
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
inline static const char* Atoi(const char* p, int* out) {
  int sign, value;
  while (*p == ' ') {
    ++p;
  }
  sign = 1;
  if (*p == '-') {
    sign = -1;
    ++p;
  }
  else if (*p == '+') {
    ++p;
  }
  for (value = 0; *p >= '0' && *p <= '9'; ++p) {
    value = value * 10 + (*p - '0');
  }
  *out = sign * value;
  while (*p == ' ') {
    ++p;
  }
  return p;
}

//ref to http://www.leapsecond.com/tools/fast_atof.c
107
inline static const char* Atof(const char* p, float* out) {
Guolin Ke's avatar
Guolin Ke committed
108
  int frac;
109
  float sign, value, scale;
Guolin Ke's avatar
Guolin Ke committed
110
  *out = 0;
Guolin Ke's avatar
Guolin Ke committed
111
112
113
114
115
116
  // Skip leading white space, if any.
  while (*p == ' ') {
    ++p;
  }

  // Get sign, if any.
117
  sign = 1.0f;
Guolin Ke's avatar
Guolin Ke committed
118
  if (*p == '-') {
119
    sign = -1.0f;
Guolin Ke's avatar
Guolin Ke committed
120
121
122
123
124
125
    ++p;
  }
  else if (*p == '+') {
    ++p;
  }

Guolin Ke's avatar
Guolin Ke committed
126
127
128
  // is a number
  if ((*p >= '0' && *p <= '9') || *p == '.' || *p == 'e' || *p == 'E') {
    // Get digits before decimal point or exponent, if any.
129
130
    for (value = 0.0f; *p >= '0' && *p <= '9'; ++p) {
      value = value * 10.0f + (*p - '0');
Guolin Ke's avatar
Guolin Ke committed
131
    }
Guolin Ke's avatar
Guolin Ke committed
132

Guolin Ke's avatar
Guolin Ke committed
133
134
    // Get digits after decimal point, if any.
    if (*p == '.') {
135
      float pow10 = 10.0f;
Guolin Ke's avatar
Guolin Ke committed
136
      ++p;
Guolin Ke's avatar
Guolin Ke committed
137
138
      while (*p >= '0' && *p <= '9') {
        value += (*p - '0') / pow10;
139
        pow10 *= 10.0f;
Guolin Ke's avatar
Guolin Ke committed
140
141
        ++p;
      }
Guolin Ke's avatar
Guolin Ke committed
142
143
    }

Guolin Ke's avatar
Guolin Ke committed
144
145
    // Handle exponent, if any.
    frac = 0;
146
    scale = 1.0f;
Guolin Ke's avatar
Guolin Ke committed
147
148
149
    if ((*p == 'e') || (*p == 'E')) {
      unsigned int expon;
      // Get sign of exponent, if any.
Guolin Ke's avatar
Guolin Ke committed
150
      ++p;
Guolin Ke's avatar
Guolin Ke committed
151
152
153
154
155
156
157
158
159
160
      if (*p == '-') {
        frac = 1;
        ++p;
      } else if (*p == '+') {
        ++p;
      }
      // Get digits of exponent, if any.
      for (expon = 0; *p >= '0' && *p <= '9'; ++p) {
        expon = expon * 10 + (*p - '0');
      }
161
      if (expon > 38) expon = 38;
Guolin Ke's avatar
Guolin Ke committed
162
      while (expon >= 8) { scale *= 1E8;  expon -= 8; }
163
      while (expon > 0) { scale *= 10.0f; expon -= 1; }
Guolin Ke's avatar
Guolin Ke committed
164
    }
Guolin Ke's avatar
Guolin Ke committed
165
166
167
    // Return signed and scaled floating point result.
    *out = sign * (frac ? (value / scale) : (value * scale));
  } else {
168
    size_t cnt = 0;
169
    while (*(p + cnt) != '\0' && *(p + cnt) != ' '
170
171
172
173
174
      && *(p + cnt) != '\t' && *(p + cnt) != ','
      && *(p + cnt) != '\n' && *(p + cnt) != '\r'
      && *(p + cnt) != ':')  {
      ++cnt;
    }
Guolin Ke's avatar
Guolin Ke committed
175
176
177
178
    if(cnt > 0){
      std::string tmp_str(p, cnt);
      std::transform(tmp_str.begin(), tmp_str.end(), tmp_str.begin(), ::tolower);
      if (tmp_str == std::string("na") || tmp_str == std::string("nan")) {
179
        *out = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
180
      } else if( tmp_str == std::string("inf") || tmp_str == std::string("infinity")) {
181
        *out = sign * static_cast<float>(1e38);
Guolin Ke's avatar
Guolin Ke committed
182
183
      }
      else {
Qiwei Ye's avatar
Qiwei Ye committed
184
        Log::Fatal("Unknow token %s in data file", tmp_str.c_str());
Guolin Ke's avatar
Guolin Ke committed
185
186
      }
      p += cnt;
187
    }
Guolin Ke's avatar
Guolin Ke committed
188
  }
Guolin Ke's avatar
Guolin Ke committed
189

Guolin Ke's avatar
Guolin Ke committed
190
191
192
  while (*p == ' ') {
    ++p;
  }
Guolin Ke's avatar
Guolin Ke committed
193

Guolin Ke's avatar
Guolin Ke committed
194
195
196
  return p;
}

197
198
199
200
201
202
203
204
inline bool AtoiAndCheck(const char* p, int* out) {
  const char* after = Atoi(p, out);
  if (*after != '\0') {
    return false;
  }
  return true;
}

205
inline bool AtofAndCheck(const char* p, float* out) {
206
207
208
209
210
211
212
  const char* after = Atof(p, out);
  if (*after != '\0') {
    return false;
  }
  return true;
}

Guolin Ke's avatar
Guolin Ke committed
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
inline static const char* SkipSpaceAndTab(const char* p) {
  while (*p == ' ' || *p == '\t') {
    ++p;
  }
  return p;
}

inline static const char* SkipReturn(const char* p) {
  while (*p == '\n' || *p == '\r' || *p == ' ') {
    ++p;
  }
  return p;
}

template<typename T>
inline static std::string ArrayToString(const T* arr, int n, char delimiter) {
  if (n <= 0) {
    return std::string("");
  }
232
233
  std::stringstream str_buf;
  str_buf << arr[0];
Guolin Ke's avatar
Guolin Ke committed
234
  for (int i = 1; i < n; ++i) {
235
236
    str_buf << delimiter;
    str_buf << arr[i];
Guolin Ke's avatar
Guolin Ke committed
237
  }
238
  return str_buf.str();
Guolin Ke's avatar
Guolin Ke committed
239
240
}

241
242
243
244
template<typename T>
inline static std::string ArrayToString(std::vector<T> arr, char delimiter) {
  if (arr.size() <= 0) {
    return std::string("");
Guolin Ke's avatar
Guolin Ke committed
245
  }
246
247
248
249
250
  std::stringstream str_buf;
  str_buf << arr[0];
  for (size_t i = 1; i < arr.size(); ++i) {
    str_buf << delimiter;
    str_buf << arr[i];
Guolin Ke's avatar
Guolin Ke committed
251
  }
252
  return str_buf.str();
Guolin Ke's avatar
Guolin Ke committed
253
254
}

255
inline static void StringToIntArray(const std::string& str, char delimiter, size_t n, int* out) {
Guolin Ke's avatar
Guolin Ke committed
256
257
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
  if (strs.size() != n) {
258
    Log::Fatal("StringToIntArray error, size doesn't matched.");
Guolin Ke's avatar
Guolin Ke committed
259
260
261
  }
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
262
    Atoi(strs[i].c_str(), &out[i]);
Guolin Ke's avatar
Guolin Ke committed
263
264
265
  }
}

266
inline static void StringToFloatArray(const std::string& str, char delimiter, size_t n, float* out) {
Guolin Ke's avatar
Guolin Ke committed
267
268
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
  if (strs.size() != n) {
269
    Log::Fatal("StringToFloatArray error, size doesn't matched.");
Guolin Ke's avatar
Guolin Ke committed
270
271
272
  }
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
273
    Atof(strs[i].c_str(), &out[i]);
Guolin Ke's avatar
Guolin Ke committed
274
275
276
  }
}

277
inline static std::vector<float> StringToFloatArray(const std::string& str, char delimiter) {
Guolin Ke's avatar
Guolin Ke committed
278
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
279
  std::vector<float> ret;
Guolin Ke's avatar
Guolin Ke committed
280
281
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
282
    float val = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
    Atof(strs[i].c_str(), &val);
    ret.push_back(val);
  }
  return ret;
}

inline static std::vector<int> StringToIntArray(const std::string& str, char delimiter) {
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
  std::vector<int> ret;
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
    int val = 0;
    Atoi(strs[i].c_str(), &val);
    ret.push_back(val);
  }
  return ret;
}

inline static std::string Join(const std::vector<std::string>& strs, char delimiter) {
  if (strs.size() <= 0) {
    return std::string("");
  }
  std::stringstream ss;
  ss << strs[0];
  for (size_t i = 1; i < strs.size(); ++i) {
    ss << delimiter;
    ss << strs[i];
  }
  return ss.str();
}

inline static std::string Join(const std::vector<std::string>& strs, size_t start, size_t end, char delimiter) {
  if (end - start <= 0) {
    return std::string("");
  }
  start = Min<size_t>(start, static_cast<size_t>(strs.size()) - 1);
  end = Min<size_t>(end, static_cast<size_t>(strs.size()));
  std::stringstream ss;
  ss << strs[start];
  for (size_t i = start + 1; i < end; ++i) {
    ss << delimiter;
    ss << strs[i];
  }
  return ss.str();
}

static inline int64_t Pow2RoundUp(int64_t x) {
  int64_t t = 1;
  for (int i = 0; i < 64; ++i) {
    if (t >= x) {
      return t;
    }
    t <<= 1;
  }
  return 0;
}

340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
/*!
 * \brief Do inplace softmax transformaton on p_rec
 * \param p_rec The input/output vector of the values.
 */
inline void Softmax(std::vector<float>* p_rec) {
  std::vector<float> &rec = *p_rec;
  float wmax = rec[0];
  for (size_t i = 1; i < rec.size(); ++i) {
    wmax = std::max(rec[i], wmax);
  }
  float wsum = 0.0f;
  for (size_t i = 0; i < rec.size(); ++i) {
    rec[i] = std::exp(rec[i] - wmax);
    wsum += rec[i];
  }
  for (size_t i = 0; i < rec.size(); ++i) {
    rec[i] /= static_cast<float>(wsum);
  }
}

Guolin Ke's avatar
Guolin Ke committed
360
361
362
363
}  // namespace Common

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
364
#endif   // LightGBM_UTILS_COMMON_FUN_H_