common.h 9.28 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
#ifndef LIGHTGBM_UTILS_COMMON_FUN_H_
#define LIGHTGBM_UTILS_COMMON_FUN_H_

#include <LightGBM/utils/log.h>

#include <cstdio>
#include <string>
#include <vector>
#include <sstream>
#include <cstdint>
11
#include <algorithm>
Guolin Ke's avatar
Guolin Ke committed
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37

namespace LightGBM {

namespace Common {

template<typename T>
inline static T Max(const T& a, const T& b) {
  return a > b ? a : b;
}

template<typename T>
inline static T Min(const T& a, const T& b) {
  return a < b ? a : b;
}



inline static std::string& Trim(std::string& str) {
  if (str.size() <= 0) {
    return str;
  }
  str.erase(str.find_last_not_of(" \f\n\r\t\v") + 1);
  str.erase(0, str.find_first_not_of(" \f\n\r\t\v"));
  return str;
}

38
39
40
41
42
43
44
45
inline static std::string& RemoveQuotationSymbol(std::string& str) {
  if (str.size() <= 0) {
    return str;
  }
  str.erase(str.find_last_not_of("'\"") + 1);
  str.erase(0, str.find_first_not_of("'\""));
  return str;
}
Guolin Ke's avatar
Guolin Ke committed
46
47
48
49
50
51
52
inline static bool StartsWith(const std::string& str, const std::string prefix) {
  if (str.substr(0, prefix.size()) == prefix) {
    return true;
  } else {
    return false;
  }
}
Guolin Ke's avatar
Guolin Ke committed
53
inline static std::vector<std::string> Split(const char* c_str, char delimiter) {
Guolin Ke's avatar
Guolin Ke committed
54
  std::vector<std::string> ret;
Guolin Ke's avatar
Guolin Ke committed
55
56
57
58
59
60
61
  std::string str(c_str);
  size_t i = 0;
  size_t pos = str.find(delimiter);
  while (pos != std::string::npos) {
    ret.push_back(str.substr(i, pos - i));
    i = ++pos;
    pos = str.find(delimiter, pos);
Guolin Ke's avatar
Guolin Ke committed
62
  }
Guolin Ke's avatar
Guolin Ke committed
63
  ret.push_back(str.substr(i));
Guolin Ke's avatar
Guolin Ke committed
64
65
66
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
inline static std::vector<std::string> Split(const char* c_str, const char* delimiters) {
  // will split when met any chars in delimiters
  std::vector<std::string> ret;
  std::string str(c_str);
  size_t i = 0;
  size_t pos = str.find_first_of(delimiters);
  while (pos != std::string::npos) {
    ret.push_back(str.substr(i, pos - i));
    i = ++pos;
    pos = str.find_first_of(delimiters, pos);
  }
  ret.push_back(str.substr(i));
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
82
83
84
85
86
87
88
89
90
inline static const char* Atoi(const char* p, int* out) {
  int sign, value;
  while (*p == ' ') {
    ++p;
  }
  sign = 1;
  if (*p == '-') {
    sign = -1;
    ++p;
91
  } else if (*p == '+') {
Guolin Ke's avatar
Guolin Ke committed
92
93
94
95
96
97
98
99
100
101
102
103
104
    ++p;
  }
  for (value = 0; *p >= '0' && *p <= '9'; ++p) {
    value = value * 10 + (*p - '0');
  }
  *out = sign * value;
  while (*p == ' ') {
    ++p;
  }
  return p;
}

//ref to http://www.leapsecond.com/tools/fast_atof.c
105
inline static const char* Atof(const char* p, float* out) {
Guolin Ke's avatar
Guolin Ke committed
106
  int frac;
107
  float sign, value, scale;
Guolin Ke's avatar
Guolin Ke committed
108
  *out = 0;
Guolin Ke's avatar
Guolin Ke committed
109
110
111
112
113
114
  // Skip leading white space, if any.
  while (*p == ' ') {
    ++p;
  }

  // Get sign, if any.
115
  sign = 1.0f;
Guolin Ke's avatar
Guolin Ke committed
116
  if (*p == '-') {
117
    sign = -1.0f;
Guolin Ke's avatar
Guolin Ke committed
118
    ++p;
119
  } else if (*p == '+') {
Guolin Ke's avatar
Guolin Ke committed
120
121
122
    ++p;
  }

Guolin Ke's avatar
Guolin Ke committed
123
124
125
  // is a number
  if ((*p >= '0' && *p <= '9') || *p == '.' || *p == 'e' || *p == 'E') {
    // Get digits before decimal point or exponent, if any.
126
127
    for (value = 0.0f; *p >= '0' && *p <= '9'; ++p) {
      value = value * 10.0f + (*p - '0');
Guolin Ke's avatar
Guolin Ke committed
128
    }
Guolin Ke's avatar
Guolin Ke committed
129

Guolin Ke's avatar
Guolin Ke committed
130
131
    // Get digits after decimal point, if any.
    if (*p == '.') {
132
      float pow10 = 10.0f;
Guolin Ke's avatar
Guolin Ke committed
133
      ++p;
Guolin Ke's avatar
Guolin Ke committed
134
135
      while (*p >= '0' && *p <= '9') {
        value += (*p - '0') / pow10;
136
        pow10 *= 10.0f;
Guolin Ke's avatar
Guolin Ke committed
137
138
        ++p;
      }
Guolin Ke's avatar
Guolin Ke committed
139
140
    }

Guolin Ke's avatar
Guolin Ke committed
141
142
    // Handle exponent, if any.
    frac = 0;
143
    scale = 1.0f;
Guolin Ke's avatar
Guolin Ke committed
144
145
146
    if ((*p == 'e') || (*p == 'E')) {
      unsigned int expon;
      // Get sign of exponent, if any.
Guolin Ke's avatar
Guolin Ke committed
147
      ++p;
Guolin Ke's avatar
Guolin Ke committed
148
149
150
151
152
153
154
155
156
157
      if (*p == '-') {
        frac = 1;
        ++p;
      } else if (*p == '+') {
        ++p;
      }
      // Get digits of exponent, if any.
      for (expon = 0; *p >= '0' && *p <= '9'; ++p) {
        expon = expon * 10 + (*p - '0');
      }
158
      if (expon > 38) expon = 38;
Guolin Ke's avatar
Guolin Ke committed
159
      while (expon >= 8) { scale *= 1E8;  expon -= 8; }
160
      while (expon > 0) { scale *= 10.0f; expon -= 1; }
Guolin Ke's avatar
Guolin Ke committed
161
    }
Guolin Ke's avatar
Guolin Ke committed
162
163
164
    // Return signed and scaled floating point result.
    *out = sign * (frac ? (value / scale) : (value * scale));
  } else {
165
    size_t cnt = 0;
166
    while (*(p + cnt) != '\0' && *(p + cnt) != ' '
167
168
      && *(p + cnt) != '\t' && *(p + cnt) != ','
      && *(p + cnt) != '\n' && *(p + cnt) != '\r'
169
      && *(p + cnt) != ':') {
170
171
      ++cnt;
    }
172
    if (cnt > 0) {
Guolin Ke's avatar
Guolin Ke committed
173
174
175
      std::string tmp_str(p, cnt);
      std::transform(tmp_str.begin(), tmp_str.end(), tmp_str.begin(), ::tolower);
      if (tmp_str == std::string("na") || tmp_str == std::string("nan")) {
176
        *out = 0.0f;
177
      } else if (tmp_str == std::string("inf") || tmp_str == std::string("infinity")) {
178
        *out = sign * static_cast<float>(1e38);
179
      } else {
Qiwei Ye's avatar
Qiwei Ye committed
180
        Log::Fatal("Unknow token %s in data file", tmp_str.c_str());
Guolin Ke's avatar
Guolin Ke committed
181
182
      }
      p += cnt;
183
    }
Guolin Ke's avatar
Guolin Ke committed
184
  }
Guolin Ke's avatar
Guolin Ke committed
185

Guolin Ke's avatar
Guolin Ke committed
186
187
188
  while (*p == ' ') {
    ++p;
  }
Guolin Ke's avatar
Guolin Ke committed
189

Guolin Ke's avatar
Guolin Ke committed
190
191
192
  return p;
}

193
194
195
196
197
198
199
200
inline bool AtoiAndCheck(const char* p, int* out) {
  const char* after = Atoi(p, out);
  if (*after != '\0') {
    return false;
  }
  return true;
}

201
inline bool AtofAndCheck(const char* p, float* out) {
202
203
204
205
206
207
208
  const char* after = Atof(p, out);
  if (*after != '\0') {
    return false;
  }
  return true;
}

Guolin Ke's avatar
Guolin Ke committed
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
inline static const char* SkipSpaceAndTab(const char* p) {
  while (*p == ' ' || *p == '\t') {
    ++p;
  }
  return p;
}

inline static const char* SkipReturn(const char* p) {
  while (*p == '\n' || *p == '\r' || *p == ' ') {
    ++p;
  }
  return p;
}

template<typename T>
inline static std::string ArrayToString(const T* arr, int n, char delimiter) {
  if (n <= 0) {
    return std::string("");
  }
228
229
  std::stringstream str_buf;
  str_buf << arr[0];
Guolin Ke's avatar
Guolin Ke committed
230
  for (int i = 1; i < n; ++i) {
231
232
    str_buf << delimiter;
    str_buf << arr[i];
Guolin Ke's avatar
Guolin Ke committed
233
  }
234
  return str_buf.str();
Guolin Ke's avatar
Guolin Ke committed
235
236
}

237
238
239
240
template<typename T>
inline static std::string ArrayToString(std::vector<T> arr, char delimiter) {
  if (arr.size() <= 0) {
    return std::string("");
Guolin Ke's avatar
Guolin Ke committed
241
  }
242
243
244
245
246
  std::stringstream str_buf;
  str_buf << arr[0];
  for (size_t i = 1; i < arr.size(); ++i) {
    str_buf << delimiter;
    str_buf << arr[i];
Guolin Ke's avatar
Guolin Ke committed
247
  }
248
  return str_buf.str();
Guolin Ke's avatar
Guolin Ke committed
249
250
}

251
inline static void StringToIntArray(const std::string& str, char delimiter, size_t n, int* out) {
Guolin Ke's avatar
Guolin Ke committed
252
253
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
  if (strs.size() != n) {
254
    Log::Fatal("StringToIntArray error, size doesn't matched.");
Guolin Ke's avatar
Guolin Ke committed
255
256
257
  }
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
258
    Atoi(strs[i].c_str(), &out[i]);
Guolin Ke's avatar
Guolin Ke committed
259
260
261
  }
}

262
inline static void StringToFloatArray(const std::string& str, char delimiter, size_t n, float* out) {
Guolin Ke's avatar
Guolin Ke committed
263
264
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
  if (strs.size() != n) {
265
    Log::Fatal("StringToFloatArray error, size doesn't matched.");
Guolin Ke's avatar
Guolin Ke committed
266
267
268
  }
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
269
    Atof(strs[i].c_str(), &out[i]);
Guolin Ke's avatar
Guolin Ke committed
270
271
272
  }
}

273
inline static std::vector<float> StringToFloatArray(const std::string& str, char delimiter) {
Guolin Ke's avatar
Guolin Ke committed
274
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
275
  std::vector<float> ret;
Guolin Ke's avatar
Guolin Ke committed
276
277
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
278
    float val = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
    Atof(strs[i].c_str(), &val);
    ret.push_back(val);
  }
  return ret;
}

inline static std::vector<int> StringToIntArray(const std::string& str, char delimiter) {
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
  std::vector<int> ret;
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
    int val = 0;
    Atoi(strs[i].c_str(), &val);
    ret.push_back(val);
  }
  return ret;
}

inline static std::string Join(const std::vector<std::string>& strs, char delimiter) {
  if (strs.size() <= 0) {
    return std::string("");
  }
  std::stringstream ss;
  ss << strs[0];
  for (size_t i = 1; i < strs.size(); ++i) {
    ss << delimiter;
    ss << strs[i];
  }
  return ss.str();
}

inline static std::string Join(const std::vector<std::string>& strs, size_t start, size_t end, char delimiter) {
  if (end - start <= 0) {
    return std::string("");
  }
  start = Min<size_t>(start, static_cast<size_t>(strs.size()) - 1);
  end = Min<size_t>(end, static_cast<size_t>(strs.size()));
  std::stringstream ss;
  ss << strs[start];
  for (size_t i = start + 1; i < end; ++i) {
    ss << delimiter;
    ss << strs[i];
  }
  return ss.str();
}

static inline int64_t Pow2RoundUp(int64_t x) {
  int64_t t = 1;
  for (int i = 0; i < 64; ++i) {
    if (t >= x) {
      return t;
    }
    t <<= 1;
  }
  return 0;
}

336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
/*!
 * \brief Do inplace softmax transformaton on p_rec
 * \param p_rec The input/output vector of the values.
 */
inline void Softmax(std::vector<float>* p_rec) {
  std::vector<float> &rec = *p_rec;
  float wmax = rec[0];
  for (size_t i = 1; i < rec.size(); ++i) {
    wmax = std::max(rec[i], wmax);
  }
  float wsum = 0.0f;
  for (size_t i = 0; i < rec.size(); ++i) {
    rec[i] = std::exp(rec[i] - wmax);
    wsum += rec[i];
  }
  for (size_t i = 0; i < rec.size(); ++i) {
    rec[i] /= static_cast<float>(wsum);
  }
}

356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
template<typename T1, typename T2>
inline void SortForPair(std::vector<T1>& keys, std::vector<T2>& values, size_t start, bool is_reverse = false) {
  std::vector<std::pair<T1, T2>> arr;
  for (size_t i = start; i < keys.size(); ++i) {
    arr.emplace_back(keys[i], values[i]);
  }
  if (!is_reverse) {
    std::sort(arr.begin(), arr.end(), [](const std::pair<T1, T2>& a, const std::pair<T1, T2>& b) {
      return a.first < b.first;
    });
  } else {
    std::sort(arr.begin(), arr.end(), [](const std::pair<T1, T2>& a, const std::pair<T1, T2>& b) {
      return a.first > b.first;
    });
  }
  for (size_t i = start; i < arr.size(); ++i) {
    keys[i] = arr[i].first;
    values[i] = arr[i].second;
  }

}

Guolin Ke's avatar
Guolin Ke committed
378
379
380
381
}  // namespace Common

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
382
#endif   // LightGBM_UTILS_COMMON_FUN_H_