"R-package/vscode:/vscode.git/clone" did not exist on "5bb5f1b9b602b6f24b7cf18a4d5be6537da1121a"
common.h 9.33 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
#ifndef LIGHTGBM_UTILS_COMMON_FUN_H_
#define LIGHTGBM_UTILS_COMMON_FUN_H_

#include <LightGBM/utils/log.h>

#include <cstdio>
#include <string>
#include <vector>
#include <sstream>
#include <cstdint>
11
#include <algorithm>
12
#include <cmath>
Guolin Ke's avatar
Guolin Ke committed
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38

namespace LightGBM {

namespace Common {

template<typename T>
inline static T Max(const T& a, const T& b) {
  return a > b ? a : b;
}

template<typename T>
inline static T Min(const T& a, const T& b) {
  return a < b ? a : b;
}



inline static std::string& Trim(std::string& str) {
  if (str.size() <= 0) {
    return str;
  }
  str.erase(str.find_last_not_of(" \f\n\r\t\v") + 1);
  str.erase(0, str.find_first_not_of(" \f\n\r\t\v"));
  return str;
}

39
40
41
42
43
44
45
46
inline static std::string& RemoveQuotationSymbol(std::string& str) {
  if (str.size() <= 0) {
    return str;
  }
  str.erase(str.find_last_not_of("'\"") + 1);
  str.erase(0, str.find_first_not_of("'\""));
  return str;
}
Guolin Ke's avatar
Guolin Ke committed
47
48
49
50
51
52
53
inline static bool StartsWith(const std::string& str, const std::string prefix) {
  if (str.substr(0, prefix.size()) == prefix) {
    return true;
  } else {
    return false;
  }
}
Guolin Ke's avatar
Guolin Ke committed
54
inline static std::vector<std::string> Split(const char* c_str, char delimiter) {
Guolin Ke's avatar
Guolin Ke committed
55
  std::vector<std::string> ret;
Guolin Ke's avatar
Guolin Ke committed
56
57
58
59
60
61
62
  std::string str(c_str);
  size_t i = 0;
  size_t pos = str.find(delimiter);
  while (pos != std::string::npos) {
    ret.push_back(str.substr(i, pos - i));
    i = ++pos;
    pos = str.find(delimiter, pos);
Guolin Ke's avatar
Guolin Ke committed
63
  }
Guolin Ke's avatar
Guolin Ke committed
64
  ret.push_back(str.substr(i));
Guolin Ke's avatar
Guolin Ke committed
65
66
67
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
inline static std::vector<std::string> Split(const char* c_str, const char* delimiters) {
  // will split when met any chars in delimiters
  std::vector<std::string> ret;
  std::string str(c_str);
  size_t i = 0;
  size_t pos = str.find_first_of(delimiters);
  while (pos != std::string::npos) {
    ret.push_back(str.substr(i, pos - i));
    i = ++pos;
    pos = str.find_first_of(delimiters, pos);
  }
  ret.push_back(str.substr(i));
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
83
84
85
86
87
88
89
90
91
inline static const char* Atoi(const char* p, int* out) {
  int sign, value;
  while (*p == ' ') {
    ++p;
  }
  sign = 1;
  if (*p == '-') {
    sign = -1;
    ++p;
92
  } else if (*p == '+') {
Guolin Ke's avatar
Guolin Ke committed
93
94
95
96
97
98
99
100
101
102
103
104
    ++p;
  }
  for (value = 0; *p >= '0' && *p <= '9'; ++p) {
    value = value * 10 + (*p - '0');
  }
  *out = sign * value;
  while (*p == ' ') {
    ++p;
  }
  return p;
}

105
inline static const char* Atof(const char* p, double* out) {
Guolin Ke's avatar
Guolin Ke committed
106
  int frac;
107
  double sign, value, scale;
Guolin Ke's avatar
Guolin Ke committed
108
  *out = 0;
Guolin Ke's avatar
Guolin Ke committed
109
110
111
112
113
114
  // Skip leading white space, if any.
  while (*p == ' ') {
    ++p;
  }

  // Get sign, if any.
115
  sign = 1.0;
Guolin Ke's avatar
Guolin Ke committed
116
  if (*p == '-') {
117
    sign = -1.0;
Guolin Ke's avatar
Guolin Ke committed
118
    ++p;
119
  } else if (*p == '+') {
Guolin Ke's avatar
Guolin Ke committed
120
121
122
    ++p;
  }

Guolin Ke's avatar
Guolin Ke committed
123
124
125
  // is a number
  if ((*p >= '0' && *p <= '9') || *p == '.' || *p == 'e' || *p == 'E') {
    // Get digits before decimal point or exponent, if any.
126
127
    for (value = 0.0; *p >= '0' && *p <= '9'; ++p) {
      value = value * 10.0 + (*p - '0');
Guolin Ke's avatar
Guolin Ke committed
128
    }
Guolin Ke's avatar
Guolin Ke committed
129

Guolin Ke's avatar
Guolin Ke committed
130
131
    // Get digits after decimal point, if any.
    if (*p == '.') {
132
      double pow10 = 10.0;
Guolin Ke's avatar
Guolin Ke committed
133
      ++p;
Guolin Ke's avatar
Guolin Ke committed
134
135
      while (*p >= '0' && *p <= '9') {
        value += (*p - '0') / pow10;
136
        pow10 *= 10.0;
Guolin Ke's avatar
Guolin Ke committed
137
138
        ++p;
      }
Guolin Ke's avatar
Guolin Ke committed
139
140
    }

Guolin Ke's avatar
Guolin Ke committed
141
142
    // Handle exponent, if any.
    frac = 0;
143
    scale = 1.0;
Guolin Ke's avatar
Guolin Ke committed
144
145
146
    if ((*p == 'e') || (*p == 'E')) {
      unsigned int expon;
      // Get sign of exponent, if any.
Guolin Ke's avatar
Guolin Ke committed
147
      ++p;
Guolin Ke's avatar
Guolin Ke committed
148
149
150
151
152
153
154
155
156
157
      if (*p == '-') {
        frac = 1;
        ++p;
      } else if (*p == '+') {
        ++p;
      }
      // Get digits of exponent, if any.
      for (expon = 0; *p >= '0' && *p <= '9'; ++p) {
        expon = expon * 10 + (*p - '0');
      }
158
159
160
      if (expon > 308) expon = 308;
      // Calculate scaling factor.
      while (expon >= 50) { scale *= 1E50; expon -= 50; }
Guolin Ke's avatar
Guolin Ke committed
161
      while (expon >= 8) { scale *= 1E8;  expon -= 8; }
162
      while (expon > 0) { scale *= 10.0; expon -= 1; }
Guolin Ke's avatar
Guolin Ke committed
163
    }
Guolin Ke's avatar
Guolin Ke committed
164
165
166
    // Return signed and scaled floating point result.
    *out = sign * (frac ? (value / scale) : (value * scale));
  } else {
167
    size_t cnt = 0;
168
    while (*(p + cnt) != '\0' && *(p + cnt) != ' '
169
170
      && *(p + cnt) != '\t' && *(p + cnt) != ','
      && *(p + cnt) != '\n' && *(p + cnt) != '\r'
171
      && *(p + cnt) != ':') {
172
173
      ++cnt;
    }
174
    if (cnt > 0) {
Guolin Ke's avatar
Guolin Ke committed
175
176
177
      std::string tmp_str(p, cnt);
      std::transform(tmp_str.begin(), tmp_str.end(), tmp_str.begin(), ::tolower);
      if (tmp_str == std::string("na") || tmp_str == std::string("nan")) {
178
        *out = 0;
179
      } else if (tmp_str == std::string("inf") || tmp_str == std::string("infinity")) {
180
        *out = sign * 1e308;
181
      } else {
Qiwei Ye's avatar
Qiwei Ye committed
182
        Log::Fatal("Unknow token %s in data file", tmp_str.c_str());
Guolin Ke's avatar
Guolin Ke committed
183
184
      }
      p += cnt;
185
    }
Guolin Ke's avatar
Guolin Ke committed
186
  }
Guolin Ke's avatar
Guolin Ke committed
187

Guolin Ke's avatar
Guolin Ke committed
188
189
190
  while (*p == ' ') {
    ++p;
  }
Guolin Ke's avatar
Guolin Ke committed
191

Guolin Ke's avatar
Guolin Ke committed
192
193
194
  return p;
}

195
196


197
198
199
200
201
202
203
204
inline bool AtoiAndCheck(const char* p, int* out) {
  const char* after = Atoi(p, out);
  if (*after != '\0') {
    return false;
  }
  return true;
}

205
inline bool AtofAndCheck(const char* p, double* out) {
206
207
208
209
210
211
212
  const char* after = Atof(p, out);
  if (*after != '\0') {
    return false;
  }
  return true;
}

Guolin Ke's avatar
Guolin Ke committed
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
inline static const char* SkipSpaceAndTab(const char* p) {
  while (*p == ' ' || *p == '\t') {
    ++p;
  }
  return p;
}

inline static const char* SkipReturn(const char* p) {
  while (*p == '\n' || *p == '\r' || *p == ' ') {
    ++p;
  }
  return p;
}

template<typename T>
inline static std::string ArrayToString(const T* arr, int n, char delimiter) {
  if (n <= 0) {
    return std::string("");
  }
232
233
  std::stringstream str_buf;
  str_buf << arr[0];
Guolin Ke's avatar
Guolin Ke committed
234
  for (int i = 1; i < n; ++i) {
235
236
    str_buf << delimiter;
    str_buf << arr[i];
Guolin Ke's avatar
Guolin Ke committed
237
  }
238
  return str_buf.str();
Guolin Ke's avatar
Guolin Ke committed
239
240
}

241
242
243
244
template<typename T>
inline static std::string ArrayToString(std::vector<T> arr, char delimiter) {
  if (arr.size() <= 0) {
    return std::string("");
Guolin Ke's avatar
Guolin Ke committed
245
  }
246
247
248
249
250
  std::stringstream str_buf;
  str_buf << arr[0];
  for (size_t i = 1; i < arr.size(); ++i) {
    str_buf << delimiter;
    str_buf << arr[i];
Guolin Ke's avatar
Guolin Ke committed
251
  }
252
  return str_buf.str();
Guolin Ke's avatar
Guolin Ke committed
253
254
}

255
inline static void StringToIntArray(const std::string& str, char delimiter, size_t n, int* out) {
Guolin Ke's avatar
Guolin Ke committed
256
257
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
  if (strs.size() != n) {
258
    Log::Fatal("StringToIntArray error, size doesn't matched.");
Guolin Ke's avatar
Guolin Ke committed
259
260
261
  }
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
262
    Atoi(strs[i].c_str(), &out[i]);
Guolin Ke's avatar
Guolin Ke committed
263
264
265
  }
}

266
267

inline static void StringToDoubleArray(const std::string& str, char delimiter, size_t n, double* out) {
Guolin Ke's avatar
Guolin Ke committed
268
269
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
  if (strs.size() != n) {
270
    Log::Fatal("StringToDoubleArray error, size doesn't matched.");
Guolin Ke's avatar
Guolin Ke committed
271
272
273
  }
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
274
    Atof(strs[i].c_str(), &out[i]);
Guolin Ke's avatar
Guolin Ke committed
275
276
277
  }
}

278
inline static std::vector<double> StringToDoubleArray(const std::string& str, char delimiter) {
Guolin Ke's avatar
Guolin Ke committed
279
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
280
  std::vector<double> ret;
Guolin Ke's avatar
Guolin Ke committed
281
282
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
283
    double val = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
    Atof(strs[i].c_str(), &val);
    ret.push_back(val);
  }
  return ret;
}

inline static std::vector<int> StringToIntArray(const std::string& str, char delimiter) {
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
  std::vector<int> ret;
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
    int val = 0;
    Atoi(strs[i].c_str(), &val);
    ret.push_back(val);
  }
  return ret;
}

inline static std::string Join(const std::vector<std::string>& strs, char delimiter) {
  if (strs.size() <= 0) {
    return std::string("");
  }
  std::stringstream ss;
  ss << strs[0];
  for (size_t i = 1; i < strs.size(); ++i) {
    ss << delimiter;
    ss << strs[i];
  }
  return ss.str();
}

inline static std::string Join(const std::vector<std::string>& strs, size_t start, size_t end, char delimiter) {
  if (end - start <= 0) {
    return std::string("");
  }
  start = Min<size_t>(start, static_cast<size_t>(strs.size()) - 1);
  end = Min<size_t>(end, static_cast<size_t>(strs.size()));
  std::stringstream ss;
  ss << strs[start];
  for (size_t i = start + 1; i < end; ++i) {
    ss << delimiter;
    ss << strs[i];
  }
  return ss.str();
}

static inline int64_t Pow2RoundUp(int64_t x) {
  int64_t t = 1;
  for (int i = 0; i < 64; ++i) {
    if (t >= x) {
      return t;
    }
    t <<= 1;
  }
  return 0;
}

341
342
343
344
/*!
 * \brief Do inplace softmax transformaton on p_rec
 * \param p_rec The input/output vector of the values.
 */
345
346
347
inline void Softmax(std::vector<double>* p_rec) {
  std::vector<double> &rec = *p_rec;
  double wmax = rec[0];
348
349
350
  for (size_t i = 1; i < rec.size(); ++i) {
    wmax = std::max(rec[i], wmax);
  }
351
  double wsum = 0.0f;
352
353
354
355
356
  for (size_t i = 0; i < rec.size(); ++i) {
    rec[i] = std::exp(rec[i] - wmax);
    wsum += rec[i];
  }
  for (size_t i = 0; i < rec.size(); ++i) {
357
    rec[i] /= static_cast<double>(wsum);
358
359
360
  }
}

361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
template<typename T1, typename T2>
inline void SortForPair(std::vector<T1>& keys, std::vector<T2>& values, size_t start, bool is_reverse = false) {
  std::vector<std::pair<T1, T2>> arr;
  for (size_t i = start; i < keys.size(); ++i) {
    arr.emplace_back(keys[i], values[i]);
  }
  if (!is_reverse) {
    std::sort(arr.begin(), arr.end(), [](const std::pair<T1, T2>& a, const std::pair<T1, T2>& b) {
      return a.first < b.first;
    });
  } else {
    std::sort(arr.begin(), arr.end(), [](const std::pair<T1, T2>& a, const std::pair<T1, T2>& b) {
      return a.first > b.first;
    });
  }
  for (size_t i = start; i < arr.size(); ++i) {
    keys[i] = arr[i].first;
    values[i] = arr[i].second;
  }

}

Guolin Ke's avatar
Guolin Ke committed
383
384
385
386
}  // namespace Common

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
387
#endif   // LightGBM_UTILS_COMMON_FUN_H_