".github/vscode:/vscode.git/clone" did not exist on "33836a8415df637929ab169669b3c81aac17aa4f"
common.h 9.3 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
#ifndef LIGHTGBM_UTILS_COMMON_FUN_H_
#define LIGHTGBM_UTILS_COMMON_FUN_H_

#include <LightGBM/utils/log.h>

#include <cstdio>
#include <string>
#include <vector>
#include <sstream>
#include <cstdint>
11
#include <algorithm>
12
#include <cmath>
Guolin Ke's avatar
Guolin Ke committed
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38

namespace LightGBM {

namespace Common {

template<typename T>
inline static T Max(const T& a, const T& b) {
  return a > b ? a : b;
}

template<typename T>
inline static T Min(const T& a, const T& b) {
  return a < b ? a : b;
}



inline static std::string& Trim(std::string& str) {
  if (str.size() <= 0) {
    return str;
  }
  str.erase(str.find_last_not_of(" \f\n\r\t\v") + 1);
  str.erase(0, str.find_first_not_of(" \f\n\r\t\v"));
  return str;
}

39
40
41
42
43
44
45
46
inline static std::string& RemoveQuotationSymbol(std::string& str) {
  if (str.size() <= 0) {
    return str;
  }
  str.erase(str.find_last_not_of("'\"") + 1);
  str.erase(0, str.find_first_not_of("'\""));
  return str;
}
Guolin Ke's avatar
Guolin Ke committed
47
48
49
50
51
52
53
inline static bool StartsWith(const std::string& str, const std::string prefix) {
  if (str.substr(0, prefix.size()) == prefix) {
    return true;
  } else {
    return false;
  }
}
Guolin Ke's avatar
Guolin Ke committed
54
inline static std::vector<std::string> Split(const char* c_str, char delimiter) {
Guolin Ke's avatar
Guolin Ke committed
55
  std::vector<std::string> ret;
Guolin Ke's avatar
Guolin Ke committed
56
57
58
59
60
61
62
  std::string str(c_str);
  size_t i = 0;
  size_t pos = str.find(delimiter);
  while (pos != std::string::npos) {
    ret.push_back(str.substr(i, pos - i));
    i = ++pos;
    pos = str.find(delimiter, pos);
Guolin Ke's avatar
Guolin Ke committed
63
  }
Guolin Ke's avatar
Guolin Ke committed
64
  ret.push_back(str.substr(i));
Guolin Ke's avatar
Guolin Ke committed
65
66
67
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
inline static std::vector<std::string> Split(const char* c_str, const char* delimiters) {
  // will split when met any chars in delimiters
  std::vector<std::string> ret;
  std::string str(c_str);
  size_t i = 0;
  size_t pos = str.find_first_of(delimiters);
  while (pos != std::string::npos) {
    ret.push_back(str.substr(i, pos - i));
    i = ++pos;
    pos = str.find_first_of(delimiters, pos);
  }
  ret.push_back(str.substr(i));
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
83
84
85
86
87
88
89
90
91
inline static const char* Atoi(const char* p, int* out) {
  int sign, value;
  while (*p == ' ') {
    ++p;
  }
  sign = 1;
  if (*p == '-') {
    sign = -1;
    ++p;
92
  } else if (*p == '+') {
Guolin Ke's avatar
Guolin Ke committed
93
94
95
96
97
98
99
100
101
102
103
104
105
    ++p;
  }
  for (value = 0; *p >= '0' && *p <= '9'; ++p) {
    value = value * 10 + (*p - '0');
  }
  *out = sign * value;
  while (*p == ' ') {
    ++p;
  }
  return p;
}

//ref to http://www.leapsecond.com/tools/fast_atof.c
106
inline static const char* Atof(const char* p, float* out) {
Guolin Ke's avatar
Guolin Ke committed
107
  int frac;
108
  float sign, value, scale;
Guolin Ke's avatar
Guolin Ke committed
109
  *out = 0;
Guolin Ke's avatar
Guolin Ke committed
110
111
112
113
114
115
  // Skip leading white space, if any.
  while (*p == ' ') {
    ++p;
  }

  // Get sign, if any.
116
  sign = 1.0f;
Guolin Ke's avatar
Guolin Ke committed
117
  if (*p == '-') {
118
    sign = -1.0f;
Guolin Ke's avatar
Guolin Ke committed
119
    ++p;
120
  } else if (*p == '+') {
Guolin Ke's avatar
Guolin Ke committed
121
122
123
    ++p;
  }

Guolin Ke's avatar
Guolin Ke committed
124
125
126
  // is a number
  if ((*p >= '0' && *p <= '9') || *p == '.' || *p == 'e' || *p == 'E') {
    // Get digits before decimal point or exponent, if any.
127
128
    for (value = 0.0f; *p >= '0' && *p <= '9'; ++p) {
      value = value * 10.0f + (*p - '0');
Guolin Ke's avatar
Guolin Ke committed
129
    }
Guolin Ke's avatar
Guolin Ke committed
130

Guolin Ke's avatar
Guolin Ke committed
131
132
    // Get digits after decimal point, if any.
    if (*p == '.') {
133
      float pow10 = 10.0f;
Guolin Ke's avatar
Guolin Ke committed
134
      ++p;
Guolin Ke's avatar
Guolin Ke committed
135
136
      while (*p >= '0' && *p <= '9') {
        value += (*p - '0') / pow10;
137
        pow10 *= 10.0f;
Guolin Ke's avatar
Guolin Ke committed
138
139
        ++p;
      }
Guolin Ke's avatar
Guolin Ke committed
140
141
    }

Guolin Ke's avatar
Guolin Ke committed
142
143
    // Handle exponent, if any.
    frac = 0;
144
    scale = 1.0f;
Guolin Ke's avatar
Guolin Ke committed
145
146
147
    if ((*p == 'e') || (*p == 'E')) {
      unsigned int expon;
      // Get sign of exponent, if any.
Guolin Ke's avatar
Guolin Ke committed
148
      ++p;
Guolin Ke's avatar
Guolin Ke committed
149
150
151
152
153
154
155
156
157
158
      if (*p == '-') {
        frac = 1;
        ++p;
      } else if (*p == '+') {
        ++p;
      }
      // Get digits of exponent, if any.
      for (expon = 0; *p >= '0' && *p <= '9'; ++p) {
        expon = expon * 10 + (*p - '0');
      }
159
      if (expon > 38) expon = 38;
Guolin Ke's avatar
Guolin Ke committed
160
      while (expon >= 8) { scale *= 1E8;  expon -= 8; }
161
      while (expon > 0) { scale *= 10.0f; expon -= 1; }
Guolin Ke's avatar
Guolin Ke committed
162
    }
Guolin Ke's avatar
Guolin Ke committed
163
164
165
    // Return signed and scaled floating point result.
    *out = sign * (frac ? (value / scale) : (value * scale));
  } else {
166
    size_t cnt = 0;
167
    while (*(p + cnt) != '\0' && *(p + cnt) != ' '
168
169
      && *(p + cnt) != '\t' && *(p + cnt) != ','
      && *(p + cnt) != '\n' && *(p + cnt) != '\r'
170
      && *(p + cnt) != ':') {
171
172
      ++cnt;
    }
173
    if (cnt > 0) {
Guolin Ke's avatar
Guolin Ke committed
174
175
176
      std::string tmp_str(p, cnt);
      std::transform(tmp_str.begin(), tmp_str.end(), tmp_str.begin(), ::tolower);
      if (tmp_str == std::string("na") || tmp_str == std::string("nan")) {
177
        *out = 0.0f;
178
      } else if (tmp_str == std::string("inf") || tmp_str == std::string("infinity")) {
179
        *out = sign * static_cast<float>(1e38);
180
      } else {
Qiwei Ye's avatar
Qiwei Ye committed
181
        Log::Fatal("Unknow token %s in data file", tmp_str.c_str());
Guolin Ke's avatar
Guolin Ke committed
182
183
      }
      p += cnt;
184
    }
Guolin Ke's avatar
Guolin Ke committed
185
  }
Guolin Ke's avatar
Guolin Ke committed
186

Guolin Ke's avatar
Guolin Ke committed
187
188
189
  while (*p == ' ') {
    ++p;
  }
Guolin Ke's avatar
Guolin Ke committed
190

Guolin Ke's avatar
Guolin Ke committed
191
192
193
  return p;
}

194
195
196
197
198
199
200
201
inline bool AtoiAndCheck(const char* p, int* out) {
  const char* after = Atoi(p, out);
  if (*after != '\0') {
    return false;
  }
  return true;
}

202
inline bool AtofAndCheck(const char* p, float* out) {
203
204
205
206
207
208
209
  const char* after = Atof(p, out);
  if (*after != '\0') {
    return false;
  }
  return true;
}

Guolin Ke's avatar
Guolin Ke committed
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
inline static const char* SkipSpaceAndTab(const char* p) {
  while (*p == ' ' || *p == '\t') {
    ++p;
  }
  return p;
}

inline static const char* SkipReturn(const char* p) {
  while (*p == '\n' || *p == '\r' || *p == ' ') {
    ++p;
  }
  return p;
}

template<typename T>
inline static std::string ArrayToString(const T* arr, int n, char delimiter) {
  if (n <= 0) {
    return std::string("");
  }
229
230
  std::stringstream str_buf;
  str_buf << arr[0];
Guolin Ke's avatar
Guolin Ke committed
231
  for (int i = 1; i < n; ++i) {
232
233
    str_buf << delimiter;
    str_buf << arr[i];
Guolin Ke's avatar
Guolin Ke committed
234
  }
235
  return str_buf.str();
Guolin Ke's avatar
Guolin Ke committed
236
237
}

238
239
240
241
template<typename T>
inline static std::string ArrayToString(std::vector<T> arr, char delimiter) {
  if (arr.size() <= 0) {
    return std::string("");
Guolin Ke's avatar
Guolin Ke committed
242
  }
243
244
245
246
247
  std::stringstream str_buf;
  str_buf << arr[0];
  for (size_t i = 1; i < arr.size(); ++i) {
    str_buf << delimiter;
    str_buf << arr[i];
Guolin Ke's avatar
Guolin Ke committed
248
  }
249
  return str_buf.str();
Guolin Ke's avatar
Guolin Ke committed
250
251
}

252
inline static void StringToIntArray(const std::string& str, char delimiter, size_t n, int* out) {
Guolin Ke's avatar
Guolin Ke committed
253
254
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
  if (strs.size() != n) {
255
    Log::Fatal("StringToIntArray error, size doesn't matched.");
Guolin Ke's avatar
Guolin Ke committed
256
257
258
  }
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
259
    Atoi(strs[i].c_str(), &out[i]);
Guolin Ke's avatar
Guolin Ke committed
260
261
262
  }
}

263
inline static void StringToFloatArray(const std::string& str, char delimiter, size_t n, float* out) {
Guolin Ke's avatar
Guolin Ke committed
264
265
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
  if (strs.size() != n) {
266
    Log::Fatal("StringToFloatArray error, size doesn't matched.");
Guolin Ke's avatar
Guolin Ke committed
267
268
269
  }
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
270
    Atof(strs[i].c_str(), &out[i]);
Guolin Ke's avatar
Guolin Ke committed
271
272
273
  }
}

274
inline static std::vector<float> StringToFloatArray(const std::string& str, char delimiter) {
Guolin Ke's avatar
Guolin Ke committed
275
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
276
  std::vector<float> ret;
Guolin Ke's avatar
Guolin Ke committed
277
278
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
279
    float val = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
    Atof(strs[i].c_str(), &val);
    ret.push_back(val);
  }
  return ret;
}

inline static std::vector<int> StringToIntArray(const std::string& str, char delimiter) {
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
  std::vector<int> ret;
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
    int val = 0;
    Atoi(strs[i].c_str(), &val);
    ret.push_back(val);
  }
  return ret;
}

inline static std::string Join(const std::vector<std::string>& strs, char delimiter) {
  if (strs.size() <= 0) {
    return std::string("");
  }
  std::stringstream ss;
  ss << strs[0];
  for (size_t i = 1; i < strs.size(); ++i) {
    ss << delimiter;
    ss << strs[i];
  }
  return ss.str();
}

inline static std::string Join(const std::vector<std::string>& strs, size_t start, size_t end, char delimiter) {
  if (end - start <= 0) {
    return std::string("");
  }
  start = Min<size_t>(start, static_cast<size_t>(strs.size()) - 1);
  end = Min<size_t>(end, static_cast<size_t>(strs.size()));
  std::stringstream ss;
  ss << strs[start];
  for (size_t i = start + 1; i < end; ++i) {
    ss << delimiter;
    ss << strs[i];
  }
  return ss.str();
}

static inline int64_t Pow2RoundUp(int64_t x) {
  int64_t t = 1;
  for (int i = 0; i < 64; ++i) {
    if (t >= x) {
      return t;
    }
    t <<= 1;
  }
  return 0;
}

337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
/*!
 * \brief Do inplace softmax transformaton on p_rec
 * \param p_rec The input/output vector of the values.
 */
inline void Softmax(std::vector<float>* p_rec) {
  std::vector<float> &rec = *p_rec;
  float wmax = rec[0];
  for (size_t i = 1; i < rec.size(); ++i) {
    wmax = std::max(rec[i], wmax);
  }
  float wsum = 0.0f;
  for (size_t i = 0; i < rec.size(); ++i) {
    rec[i] = std::exp(rec[i] - wmax);
    wsum += rec[i];
  }
  for (size_t i = 0; i < rec.size(); ++i) {
    rec[i] /= static_cast<float>(wsum);
  }
}

357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
template<typename T1, typename T2>
inline void SortForPair(std::vector<T1>& keys, std::vector<T2>& values, size_t start, bool is_reverse = false) {
  std::vector<std::pair<T1, T2>> arr;
  for (size_t i = start; i < keys.size(); ++i) {
    arr.emplace_back(keys[i], values[i]);
  }
  if (!is_reverse) {
    std::sort(arr.begin(), arr.end(), [](const std::pair<T1, T2>& a, const std::pair<T1, T2>& b) {
      return a.first < b.first;
    });
  } else {
    std::sort(arr.begin(), arr.end(), [](const std::pair<T1, T2>& a, const std::pair<T1, T2>& b) {
      return a.first > b.first;
    });
  }
  for (size_t i = start; i < arr.size(); ++i) {
    keys[i] = arr[i].first;
    values[i] = arr[i].second;
  }

}

Guolin Ke's avatar
Guolin Ke committed
379
380
381
382
}  // namespace Common

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
383
#endif   // LightGBM_UTILS_COMMON_FUN_H_