common.h 27.4 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#ifndef LIGHTGBM_UTILS_COMMON_FUN_H_
#define LIGHTGBM_UTILS_COMMON_FUN_H_

8
9
10
#include <LightGBM/utils/log.h>
#include <LightGBM/utils/openmp_wrapper.h>

11
#include <limits>
Guolin Ke's avatar
Guolin Ke committed
12
#include <string>
13
#include <algorithm>
14
#include <chrono>
15
#include <cmath>
16
17
#include <cstdint>
#include <cstdio>
Guolin Ke's avatar
Guolin Ke committed
18
#include <functional>
19
#include <iomanip>
20
#include <iterator>
21
#include <map>
22
23
#include <memory>
#include <sstream>
Guolin Ke's avatar
Guolin Ke committed
24
#include <type_traits>
25
#include <unordered_map>
26
27
#include <utility>
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
28

29
#if defined(_MSC_VER)
30
31
32
33
34
35
36
37
38
39
40
#include <malloc.h>
#elif MM_MALLOC
#include <mm_malloc.h>
#elif defined(__GNUC__)
#include <malloc.h>
#define _mm_malloc(a, b) memalign(b, a)
#define _mm_free(a) free(a)
#else
#include <stdlib.h>
#define _mm_malloc(a, b) malloc(a)
#define _mm_free(a) free(a)
41
42
#endif

Guolin Ke's avatar
Guolin Ke committed
43
44
45
46
namespace LightGBM {

namespace Common {

47
inline static char tolower(char in) {
Guolin Ke's avatar
Guolin Ke committed
48
49
50
51
52
  if (in <= 'Z' && in >= 'A')
    return in - ('Z' - 'z');
  return in;
}

53
inline static std::string Trim(std::string str) {
Guolin Ke's avatar
Guolin Ke committed
54
  if (str.empty()) {
Guolin Ke's avatar
Guolin Ke committed
55
56
57
58
59
60
61
    return str;
  }
  str.erase(str.find_last_not_of(" \f\n\r\t\v") + 1);
  str.erase(0, str.find_first_not_of(" \f\n\r\t\v"));
  return str;
}

62
inline static std::string RemoveQuotationSymbol(std::string str) {
Guolin Ke's avatar
Guolin Ke committed
63
  if (str.empty()) {
64
65
66
67
68
69
    return str;
  }
  str.erase(str.find_last_not_of("'\"") + 1);
  str.erase(0, str.find_first_not_of("'\""));
  return str;
}
Guolin Ke's avatar
Guolin Ke committed
70

Guolin Ke's avatar
Guolin Ke committed
71
72
73
74
75
76
77
inline static bool StartsWith(const std::string& str, const std::string prefix) {
  if (str.substr(0, prefix.size()) == prefix) {
    return true;
  } else {
    return false;
  }
}
Guolin Ke's avatar
Guolin Ke committed
78

Guolin Ke's avatar
Guolin Ke committed
79
inline static std::vector<std::string> Split(const char* c_str, char delimiter) {
Guolin Ke's avatar
Guolin Ke committed
80
  std::vector<std::string> ret;
Guolin Ke's avatar
Guolin Ke committed
81
82
  std::string str(c_str);
  size_t i = 0;
Guolin Ke's avatar
Guolin Ke committed
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
  size_t pos = 0;
  while (pos < str.length()) {
    if (str[pos] == delimiter) {
      if (i < pos) {
        ret.push_back(str.substr(i, pos - i));
      }
      ++pos;
      i = pos;
    } else {
      ++pos;
    }
  }
  if (i < pos) {
    ret.push_back(str.substr(i));
  }
  return ret;
}

inline static std::vector<std::string> SplitLines(const char* c_str) {
  std::vector<std::string> ret;
  std::string str(c_str);
  size_t i = 0;
  size_t pos = 0;
  while (pos < str.length()) {
    if (str[pos] == '\n' || str[pos] == '\r') {
      if (i < pos) {
        ret.push_back(str.substr(i, pos - i));
      }
      // skip the line endings
      while (str[pos] == '\n' || str[pos] == '\r') ++pos;
      // new begin
      i = pos;
    } else {
      ++pos;
    }
  }
  if (i < pos) {
    ret.push_back(str.substr(i));
Guolin Ke's avatar
Guolin Ke committed
121
122
123
124
  }
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
125
126
127
128
inline static std::vector<std::string> Split(const char* c_str, const char* delimiters) {
  std::vector<std::string> ret;
  std::string str(c_str);
  size_t i = 0;
Guolin Ke's avatar
Guolin Ke committed
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
  size_t pos = 0;
  while (pos < str.length()) {
    bool met_delimiters = false;
    for (int j = 0; delimiters[j] != '\0'; ++j) {
      if (str[pos] == delimiters[j]) {
        met_delimiters = true;
        break;
      }
    }
    if (met_delimiters) {
      if (i < pos) {
        ret.push_back(str.substr(i, pos - i));
      }
      ++pos;
      i = pos;
    } else {
      ++pos;
    }
  }
  if (i < pos) {
    ret.push_back(str.substr(i));
Guolin Ke's avatar
Guolin Ke committed
150
151
152
153
  }
  return ret;
}

154
155
156
157
template<typename T>
inline static const char* Atoi(const char* p, T* out) {
  int sign;
  T value;
Guolin Ke's avatar
Guolin Ke committed
158
159
160
161
162
163
164
  while (*p == ' ') {
    ++p;
  }
  sign = 1;
  if (*p == '-') {
    sign = -1;
    ++p;
165
  } else if (*p == '+') {
Guolin Ke's avatar
Guolin Ke committed
166
167
168
169
170
    ++p;
  }
  for (value = 0; *p >= '0' && *p <= '9'; ++p) {
    value = value * 10 + (*p - '0');
  }
171
  *out = static_cast<T>(sign * value);
Guolin Ke's avatar
Guolin Ke committed
172
173
174
175
176
177
  while (*p == ' ') {
    ++p;
  }
  return p;
}

178
template<typename T>
179
180
181
182
183
184
185
186
187
188
189
190
191
192
inline static double Pow(T base, int power) {
  if (power < 0) {
    return 1.0 / Pow(base, -power);
  } else if (power == 0) {
    return 1;
  } else if (power % 2 == 0) {
    return Pow(base*base, power / 2);
  } else if (power % 3 == 0) {
    return Pow(base*base*base, power / 3);
  } else {
    return base * Pow(base, power - 1);
  }
}

193
inline static const char* Atof(const char* p, double* out) {
Guolin Ke's avatar
Guolin Ke committed
194
  int frac;
195
  double sign, value, scale;
Guolin Ke's avatar
Guolin Ke committed
196
  *out = NAN;
Guolin Ke's avatar
Guolin Ke committed
197
198
199
200
201
  // Skip leading white space, if any.
  while (*p == ' ') {
    ++p;
  }
  // Get sign, if any.
202
  sign = 1.0;
Guolin Ke's avatar
Guolin Ke committed
203
  if (*p == '-') {
204
    sign = -1.0;
Guolin Ke's avatar
Guolin Ke committed
205
    ++p;
206
  } else if (*p == '+') {
Guolin Ke's avatar
Guolin Ke committed
207
208
209
    ++p;
  }

Guolin Ke's avatar
Guolin Ke committed
210
211
212
  // is a number
  if ((*p >= '0' && *p <= '9') || *p == '.' || *p == 'e' || *p == 'E') {
    // Get digits before decimal point or exponent, if any.
213
214
    for (value = 0.0; *p >= '0' && *p <= '9'; ++p) {
      value = value * 10.0 + (*p - '0');
Guolin Ke's avatar
Guolin Ke committed
215
    }
Guolin Ke's avatar
Guolin Ke committed
216

Guolin Ke's avatar
Guolin Ke committed
217
218
    // Get digits after decimal point, if any.
    if (*p == '.') {
219
220
      double right = 0.0;
      int nn = 0;
Guolin Ke's avatar
Guolin Ke committed
221
      ++p;
Guolin Ke's avatar
Guolin Ke committed
222
      while (*p >= '0' && *p <= '9') {
223
224
        right = (*p - '0') + right * 10.0;
        ++nn;
Guolin Ke's avatar
Guolin Ke committed
225
226
        ++p;
      }
227
      value += right / Pow(10.0, nn);
Guolin Ke's avatar
Guolin Ke committed
228
229
    }

Guolin Ke's avatar
Guolin Ke committed
230
231
    // Handle exponent, if any.
    frac = 0;
232
    scale = 1.0;
Guolin Ke's avatar
Guolin Ke committed
233
    if ((*p == 'e') || (*p == 'E')) {
Guolin Ke's avatar
Guolin Ke committed
234
      uint32_t expon;
Guolin Ke's avatar
Guolin Ke committed
235
      // Get sign of exponent, if any.
Guolin Ke's avatar
Guolin Ke committed
236
      ++p;
Guolin Ke's avatar
Guolin Ke committed
237
238
239
240
241
242
243
244
245
246
      if (*p == '-') {
        frac = 1;
        ++p;
      } else if (*p == '+') {
        ++p;
      }
      // Get digits of exponent, if any.
      for (expon = 0; *p >= '0' && *p <= '9'; ++p) {
        expon = expon * 10 + (*p - '0');
      }
247
248
249
      if (expon > 308) expon = 308;
      // Calculate scaling factor.
      while (expon >= 50) { scale *= 1E50; expon -= 50; }
Guolin Ke's avatar
Guolin Ke committed
250
      while (expon >= 8) { scale *= 1E8;  expon -= 8; }
251
      while (expon > 0) { scale *= 10.0; expon -= 1; }
Guolin Ke's avatar
Guolin Ke committed
252
    }
Guolin Ke's avatar
Guolin Ke committed
253
254
255
    // Return signed and scaled floating point result.
    *out = sign * (frac ? (value / scale) : (value * scale));
  } else {
256
    size_t cnt = 0;
257
    while (*(p + cnt) != '\0' && *(p + cnt) != ' '
258
259
260
           && *(p + cnt) != '\t' && *(p + cnt) != ','
           && *(p + cnt) != '\n' && *(p + cnt) != '\r'
           && *(p + cnt) != ':') {
261
262
      ++cnt;
    }
263
    if (cnt > 0) {
Guolin Ke's avatar
Guolin Ke committed
264
      std::string tmp_str(p, cnt);
Guolin Ke's avatar
Guolin Ke committed
265
      std::transform(tmp_str.begin(), tmp_str.end(), tmp_str.begin(), Common::tolower);
zhangjin's avatar
zhangjin committed
266
267
      if (tmp_str == std::string("na") || tmp_str == std::string("nan") ||
          tmp_str == std::string("null")) {
Guolin Ke's avatar
Guolin Ke committed
268
        *out = NAN;
269
      } else if (tmp_str == std::string("inf") || tmp_str == std::string("infinity")) {
270
        *out = sign * 1e308;
271
      } else {
272
        Log::Fatal("Unknown token %s in data file", tmp_str.c_str());
Guolin Ke's avatar
Guolin Ke committed
273
274
      }
      p += cnt;
275
    }
Guolin Ke's avatar
Guolin Ke committed
276
  }
Guolin Ke's avatar
Guolin Ke committed
277

Guolin Ke's avatar
Guolin Ke committed
278
279
280
  while (*p == ' ') {
    ++p;
  }
Guolin Ke's avatar
Guolin Ke committed
281

Guolin Ke's avatar
Guolin Ke committed
282
283
284
  return p;
}

285
inline static bool AtoiAndCheck(const char* p, int* out) {
286
287
288
289
290
291
292
  const char* after = Atoi(p, out);
  if (*after != '\0') {
    return false;
  }
  return true;
}

293
inline static bool AtofAndCheck(const char* p, double* out) {
294
295
296
297
298
299
300
  const char* after = Atof(p, out);
  if (*after != '\0') {
    return false;
  }
  return true;
}

301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
inline static unsigned CountDecimalDigit32(uint32_t n) {
#if defined(_MSC_VER) || defined(__GNUC__)
  static const uint32_t powers_of_10[] = {
    0,
    10,
    100,
    1000,
    10000,
    100000,
    1000000,
    10000000,
    100000000,
    1000000000
  };
#ifdef _MSC_VER
Guolin Ke's avatar
Guolin Ke committed
316
  // NOLINTNEXTLINE
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
  unsigned long i = 0;
  _BitScanReverse(&i, n | 1);
  uint32_t t = (i + 1) * 1233 >> 12;
#elif __GNUC__
  uint32_t t = (32 - __builtin_clz(n | 1)) * 1233 >> 12;
#endif
  return t - (n < powers_of_10[t]) + 1;
#else
  if (n < 10) return 1;
  if (n < 100) return 2;
  if (n < 1000) return 3;
  if (n < 10000) return 4;
  if (n < 100000) return 5;
  if (n < 1000000) return 6;
  if (n < 10000000) return 7;
  if (n < 100000000) return 8;
  if (n < 1000000000) return 9;
  return 10;
#endif
}

inline static void Uint32ToStr(uint32_t value, char* buffer) {
  const char kDigitsLut[200] = {
340
341
342
343
344
345
346
347
348
349
    '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
    '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
    '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9',
    '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9',
    '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9',
    '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
    '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
    '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9',
    '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9',
    '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'
350
351
352
353
354
355
356
357
358
359
360
361
362
  };
  unsigned digit = CountDecimalDigit32(value);
  buffer += digit;
  *buffer = '\0';

  while (value >= 100) {
    const unsigned i = (value % 100) << 1;
    value /= 100;
    *--buffer = kDigitsLut[i + 1];
    *--buffer = kDigitsLut[i];
  }

  if (value < 10) {
363
    *--buffer = static_cast<char>(value) + '0';
364
  } else {
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
    const unsigned i = value << 1;
    *--buffer = kDigitsLut[i + 1];
    *--buffer = kDigitsLut[i];
  }
}

inline static void Int32ToStr(int32_t value, char* buffer) {
  uint32_t u = static_cast<uint32_t>(value);
  if (value < 0) {
    *buffer++ = '-';
    u = ~u + 1;
  }
  Uint32ToStr(u, buffer);
}

380
inline static void DoubleToStr(double value, char* buffer, size_t buffer_len) {
381
  #ifdef _MSC_VER
382
  int num_chars = sprintf_s(buffer, buffer_len, "%.17g", value);
383
  #else
384
  int num_chars = snprintf(buffer, buffer_len, "%.17g", value);
385
  #endif
386
  CHECK_GE(num_chars, 0);
387
388
}

Guolin Ke's avatar
Guolin Ke committed
389
390
391
392
393
394
395
396
397
398
399
400
401
402
inline static const char* SkipSpaceAndTab(const char* p) {
  while (*p == ' ' || *p == '\t') {
    ++p;
  }
  return p;
}

inline static const char* SkipReturn(const char* p) {
  while (*p == '\n' || *p == '\r' || *p == ' ') {
    ++p;
  }
  return p;
}

Guolin Ke's avatar
Guolin Ke committed
403
404
template<typename T, typename T2>
inline static std::vector<T2> ArrayCast(const std::vector<T>& arr) {
405
  std::vector<T2> ret(arr.size());
Guolin Ke's avatar
Guolin Ke committed
406
  for (size_t i = 0; i < arr.size(); ++i) {
407
    ret[i] = static_cast<T2>(arr[i]);
Guolin Ke's avatar
Guolin Ke committed
408
  }
Guolin Ke's avatar
Guolin Ke committed
409
  return ret;
Guolin Ke's avatar
Guolin Ke committed
410
411
}

412
413
template<typename T, bool is_float, bool is_unsign>
struct __TToStringHelperFast {
414
  void operator()(T value, char* buffer, size_t) const {
415
416
417
418
419
420
    Int32ToStr(value, buffer);
  }
};

template<typename T>
struct __TToStringHelperFast<T, true, false> {
421
  void operator()(T value, char* buffer, size_t buf_len)
422
  const {
423
    #ifdef _MSC_VER
424
    int num_chars = sprintf_s(buffer, buf_len, "%g", value);
425
    #else
426
    int num_chars = snprintf(buffer, buf_len, "%g", value);
427
    #endif
428
    CHECK_GE(num_chars, 0);
429
430
431
432
433
  }
};

template<typename T>
struct __TToStringHelperFast<T, false, true> {
434
  void operator()(T value, char* buffer, size_t) const {
435
436
437
438
    Uint32ToStr(value, buffer);
  }
};

439
template<typename T>
440
441
inline static std::string ArrayToStringFast(const std::vector<T>& arr, size_t n) {
  if (arr.empty() || n == 0) {
442
    return std::string("");
Guolin Ke's avatar
Guolin Ke committed
443
  }
444
445
446
  __TToStringHelperFast<T, std::is_floating_point<T>::value, std::is_unsigned<T>::value> helper;
  const size_t buf_len = 16;
  std::vector<char> buffer(buf_len);
447
  std::stringstream str_buf;
448
449
450
451
452
  helper(arr[0], buffer.data(), buf_len);
  str_buf << buffer.data();
  for (size_t i = 1; i < std::min(n, arr.size()); ++i) {
    helper(arr[i], buffer.data(), buf_len);
    str_buf << ' ' << buffer.data();
Guolin Ke's avatar
Guolin Ke committed
453
  }
454
  return str_buf.str();
Guolin Ke's avatar
Guolin Ke committed
455
456
}

457
inline static std::string ArrayToString(const std::vector<double>& arr, size_t n) {
Guolin Ke's avatar
Guolin Ke committed
458
459
460
  if (arr.empty() || n == 0) {
    return std::string("");
  }
461
462
  const size_t buf_len = 32;
  std::vector<char> buffer(buf_len);
Guolin Ke's avatar
Guolin Ke committed
463
  std::stringstream str_buf;
464
465
  DoubleToStr(arr[0], buffer.data(), buf_len);
  str_buf << buffer.data();
Guolin Ke's avatar
Guolin Ke committed
466
  for (size_t i = 1; i < std::min(n, arr.size()); ++i) {
467
468
    DoubleToStr(arr[i], buffer.data(), buf_len);
    str_buf << ' ' << buffer.data();
Guolin Ke's avatar
Guolin Ke committed
469
470
471
472
  }
  return str_buf.str();
}

473
474
475
template<typename T, bool is_float>
struct __StringToTHelper {
  T operator()(const std::string& str) const {
476
477
478
    T ret = 0;
    Atoi(str.c_str(), &ret);
    return ret;
479
480
481
482
483
484
485
486
487
488
  }
};

template<typename T>
struct __StringToTHelper<T, true> {
  T operator()(const std::string& str) const {
    return static_cast<T>(std::stod(str));
  }
};

Guolin Ke's avatar
Guolin Ke committed
489
template<typename T>
490
inline static std::vector<T> StringToArray(const std::string& str, char delimiter) {
Guolin Ke's avatar
Guolin Ke committed
491
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
492
493
  std::vector<T> ret;
  ret.reserve(strs.size());
494
  __StringToTHelper<T, std::is_floating_point<T>::value> helper;
495
496
  for (const auto& s : strs) {
    ret.push_back(helper(s));
Guolin Ke's avatar
Guolin Ke committed
497
498
499
500
  }
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
501
template<typename T>
502
503
504
505
506
inline static std::vector<T> StringToArray(const std::string& str, int n) {
  if (n == 0) {
    return std::vector<T>();
  }
  std::vector<std::string> strs = Split(str.c_str(), ' ');
Nikita Titov's avatar
Nikita Titov committed
507
  CHECK_EQ(strs.size(), static_cast<size_t>(n));
Guolin Ke's avatar
Guolin Ke committed
508
  std::vector<T> ret;
509
510
511
512
  ret.reserve(strs.size());
  __StringToTHelper<T, std::is_floating_point<T>::value> helper;
  for (const auto& s : strs) {
    ret.push_back(helper(s));
Guolin Ke's avatar
Guolin Ke committed
513
514
515
516
  }
  return ret;
}

517
518
519
520
521
522
523
524
525
526
527
528
template<typename T, bool is_float>
struct __StringToTHelperFast {
  const char* operator()(const char*p, T* out) const {
    return Atoi(p, out);
  }
};

template<typename T>
struct __StringToTHelperFast<T, true> {
  const char* operator()(const char*p, T* out) const {
    double tmp = 0.0f;
    auto ret = Atof(p, &tmp);
529
    *out = static_cast<T>(tmp);
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
    return ret;
  }
};

template<typename T>
inline static std::vector<T> StringToArrayFast(const std::string& str, int n) {
  if (n == 0) {
    return std::vector<T>();
  }
  auto p_str = str.c_str();
  __StringToTHelperFast<T, std::is_floating_point<T>::value> helper;
  std::vector<T> ret(n);
  for (int i = 0; i < n; ++i) {
    p_str = helper(p_str, &ret[i]);
  }
  return ret;
}

548
template<typename T>
Guolin Ke's avatar
Guolin Ke committed
549
inline static std::string Join(const std::vector<T>& strs, const char* delimiter) {
Guolin Ke's avatar
Guolin Ke committed
550
  if (strs.empty()) {
Guolin Ke's avatar
Guolin Ke committed
551
552
    return std::string("");
  }
553
  std::stringstream str_buf;
554
  str_buf << std::setprecision(std::numeric_limits<double>::digits10 + 2);
555
  str_buf << strs[0];
Guolin Ke's avatar
Guolin Ke committed
556
  for (size_t i = 1; i < strs.size(); ++i) {
557
558
    str_buf << delimiter;
    str_buf << strs[i];
Guolin Ke's avatar
Guolin Ke committed
559
  }
560
  return str_buf.str();
Guolin Ke's avatar
Guolin Ke committed
561
562
}

563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
template<>
inline std::string Join<int8_t>(const std::vector<int8_t>& strs, const char* delimiter) {
  if (strs.empty()) {
    return std::string("");
  }
  std::stringstream str_buf;
  str_buf << std::setprecision(std::numeric_limits<double>::digits10 + 2);
  str_buf << static_cast<int16_t>(strs[0]);
  for (size_t i = 1; i < strs.size(); ++i) {
    str_buf << delimiter;
    str_buf << static_cast<int16_t>(strs[i]);
  }
  return str_buf.str();
}

578
template<typename T>
Guolin Ke's avatar
Guolin Ke committed
579
inline static std::string Join(const std::vector<T>& strs, size_t start, size_t end, const char* delimiter) {
Guolin Ke's avatar
Guolin Ke committed
580
581
582
  if (end - start <= 0) {
    return std::string("");
  }
Guolin Ke's avatar
Guolin Ke committed
583
584
  start = std::min(start, static_cast<size_t>(strs.size()) - 1);
  end = std::min(end, static_cast<size_t>(strs.size()));
585
  std::stringstream str_buf;
586
  str_buf << std::setprecision(std::numeric_limits<double>::digits10 + 2);
587
  str_buf << strs[start];
Guolin Ke's avatar
Guolin Ke committed
588
  for (size_t i = start + 1; i < end; ++i) {
589
590
    str_buf << delimiter;
    str_buf << strs[i];
Guolin Ke's avatar
Guolin Ke committed
591
  }
592
  return str_buf.str();
Guolin Ke's avatar
Guolin Ke committed
593
594
}

595
inline static int64_t Pow2RoundUp(int64_t x) {
Guolin Ke's avatar
Guolin Ke committed
596
597
598
599
600
601
602
603
604
605
  int64_t t = 1;
  for (int i = 0; i < 64; ++i) {
    if (t >= x) {
      return t;
    }
    t <<= 1;
  }
  return 0;
}

606
/*!
607
 * \brief Do inplace softmax transformation on p_rec
608
609
 * \param p_rec The input/output vector of the values.
 */
610
inline static void Softmax(std::vector<double>* p_rec) {
611
612
  std::vector<double> &rec = *p_rec;
  double wmax = rec[0];
613
614
615
  for (size_t i = 1; i < rec.size(); ++i) {
    wmax = std::max(rec[i], wmax);
  }
616
  double wsum = 0.0f;
617
618
619
620
621
  for (size_t i = 0; i < rec.size(); ++i) {
    rec[i] = std::exp(rec[i] - wmax);
    wsum += rec[i];
  }
  for (size_t i = 0; i < rec.size(); ++i) {
622
    rec[i] /= static_cast<double>(wsum);
623
624
625
  }
}

626
inline static void Softmax(const double* input, double* output, int len) {
Guolin Ke's avatar
Guolin Ke committed
627
  double wmax = input[0];
628
  for (int i = 1; i < len; ++i) {
Guolin Ke's avatar
Guolin Ke committed
629
    wmax = std::max(input[i], wmax);
630
631
632
  }
  double wsum = 0.0f;
  for (int i = 0; i < len; ++i) {
Guolin Ke's avatar
Guolin Ke committed
633
634
    output[i] = std::exp(input[i] - wmax);
    wsum += output[i];
635
636
  }
  for (int i = 0; i < len; ++i) {
Guolin Ke's avatar
Guolin Ke committed
637
    output[i] /= static_cast<double>(wsum);
638
639
640
  }
}

Guolin Ke's avatar
Guolin Ke committed
641
642
643
template<typename T>
std::vector<const T*> ConstPtrInVectorWrapper(const std::vector<std::unique_ptr<T>>& input) {
  std::vector<const T*> ret;
Guolin Ke's avatar
Guolin Ke committed
644
645
  for (auto t = input.begin(); t !=input.end(); ++t) {
    ret.push_back(t->get());
646
  }
Guolin Ke's avatar
Guolin Ke committed
647
  return ret;
648
649
}

Guolin Ke's avatar
Guolin Ke committed
650
template<typename T1, typename T2>
Guolin Ke's avatar
Guolin Ke committed
651
inline static void SortForPair(std::vector<T1>* keys, std::vector<T2>* values, size_t start, bool is_reverse = false) {
Guolin Ke's avatar
Guolin Ke committed
652
  std::vector<std::pair<T1, T2>> arr;
Guolin Ke's avatar
Guolin Ke committed
653
654
  auto& ref_key = *keys;
  auto& ref_value = *values;
Guolin Ke's avatar
Guolin Ke committed
655
  for (size_t i = start; i < keys->size(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
656
    arr.emplace_back(ref_key[i], ref_value[i]);
Guolin Ke's avatar
Guolin Ke committed
657
658
  }
  if (!is_reverse) {
659
    std::stable_sort(arr.begin(), arr.end(), [](const std::pair<T1, T2>& a, const std::pair<T1, T2>& b) {
Guolin Ke's avatar
Guolin Ke committed
660
661
662
      return a.first < b.first;
    });
  } else {
663
    std::stable_sort(arr.begin(), arr.end(), [](const std::pair<T1, T2>& a, const std::pair<T1, T2>& b) {
Guolin Ke's avatar
Guolin Ke committed
664
665
666
667
      return a.first > b.first;
    });
  }
  for (size_t i = start; i < arr.size(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
668
669
    ref_key[i] = arr[i].first;
    ref_value[i] = arr[i].second;
Guolin Ke's avatar
Guolin Ke committed
670
671
672
  }
}

673
template <typename T>
Guolin Ke's avatar
Guolin Ke committed
674
675
inline static std::vector<T*> Vector2Ptr(std::vector<std::vector<T>>* data) {
  std::vector<T*> ptr(data->size());
Guolin Ke's avatar
Guolin Ke committed
676
  auto& ref_data = *data;
Guolin Ke's avatar
Guolin Ke committed
677
  for (size_t i = 0; i < data->size(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
678
    ptr[i] = ref_data[i].data();
679
680
681
682
683
684
685
686
687
688
689
690
691
  }
  return ptr;
}

template <typename T>
inline static std::vector<int> VectorSize(const std::vector<std::vector<T>>& data) {
  std::vector<int> ret(data.size());
  for (size_t i = 0; i < data.size(); ++i) {
    ret[i] = static_cast<int>(data[i].size());
  }
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
692
inline static double AvoidInf(double x) {
Guolin Ke's avatar
Guolin Ke committed
693
694
695
  if (std::isnan(x)) {
    return 0.0;
  } else if (x >= 1e300) {
Guolin Ke's avatar
Guolin Ke committed
696
    return 1e300;
697
  } else if (x <= -1e300) {
Guolin Ke's avatar
Guolin Ke committed
698
    return -1e300;
Guolin Ke's avatar
Guolin Ke committed
699
700
701
702
703
  } else {
    return x;
  }
}

704
inline static float AvoidInf(float x) {
Guolin Ke's avatar
Guolin Ke committed
705
  if (std::isnan(x)) {
Guolin Ke's avatar
Guolin Ke committed
706
707
    return 0.0f;
  } else if (x >= 1e38) {
708
709
710
711
712
713
    return 1e38f;
  } else if (x <= -1e38) {
    return -1e38f;
  } else {
    return x;
  }
714
715
716
}

template<typename _Iter> inline
717
718
719
720
static typename std::iterator_traits<_Iter>::value_type* IteratorValType(_Iter) {
  return (0);
}

721
template<typename _RanIt, typename _Pr, typename _VTRanIt> inline
722
723
724
static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred, _VTRanIt*) {
  size_t len = _Last - _First;
  const size_t kMinInnerLen = 1024;
725
  int num_threads = OMP_NUM_THREADS();
726
727
728
729
730
731
732
  if (len <= kMinInnerLen || num_threads <= 1) {
    std::sort(_First, _Last, _Pred);
    return;
  }
  size_t inner_size = (len + num_threads - 1) / num_threads;
  inner_size = std::max(inner_size, kMinInnerLen);
  num_threads = static_cast<int>((len + inner_size - 1) / inner_size);
733
#pragma omp parallel for schedule(static, 1)
734
735
736
737
738
739
740
741
742
743
744
  for (int i = 0; i < num_threads; ++i) {
    size_t left = inner_size*i;
    size_t right = left + inner_size;
    right = std::min(right, len);
    if (right > left) {
      std::sort(_First + left, _First + right, _Pred);
    }
  }
  // Buffer for merge.
  std::vector<_VTRanIt> temp_buf(len);
  _RanIt buf = temp_buf.begin();
745
  size_t s = inner_size;
746
747
748
  // Recursive merge
  while (s < len) {
    int loop_size = static_cast<int>((len + s * 2 - 1) / (s * 2));
749
    #pragma omp parallel for schedule(static, 1)
750
751
752
753
754
    for (int i = 0; i < loop_size; ++i) {
      size_t left = i * 2 * s;
      size_t mid = left + s;
      size_t right = mid + s;
      right = std::min(len, right);
Guolin Ke's avatar
Guolin Ke committed
755
      if (mid >= right) { continue; }
756
757
758
759
760
761
762
      std::copy(_First + left, _First + mid, buf + left);
      std::merge(buf + left, buf + mid, _First + mid, _First + right, _First + left, _Pred);
    }
    s *= 2;
  }
}

763
template<typename _RanIt, typename _Pr> inline
764
765
766
767
static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred) {
  return ParallelSort(_First, _Last, _Pred, IteratorValType(_First));
}

768
// Check that all y[] are in interval [ymin, ymax] (end points included); throws error if not
769
template <typename T>
770
inline static void CheckElementsIntervalClosed(const T *y, T ymin, T ymax, int ny, const char *callername) {
771
  auto fatal_msg = [&y, &ymin, &ymax, &callername](int i) {
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
    std::ostringstream os;
    os << "[%s]: does not tolerate element [#%i = " << y[i] << "] outside [" << ymin << ", " << ymax << "]";
    Log::Fatal(os.str().c_str(), callername, i);
  };
  for (int i = 1; i < ny; i += 2) {
    if (y[i - 1] < y[i]) {
      if (y[i - 1] < ymin) {
        fatal_msg(i - 1);
      } else if (y[i] > ymax) {
        fatal_msg(i);
      }
    } else {
      if (y[i - 1] > ymax) {
        fatal_msg(i - 1);
      } else if (y[i] < ymin) {
        fatal_msg(i);
      }
    }
  }
791
  if (ny & 1) {  // odd
792
793
    if (y[ny - 1] < ymin || y[ny - 1] > ymax) {
      fatal_msg(ny - 1);
794
795
796
797
798
799
    }
  }
}

// One-pass scan over array w with nw elements: find min, max and sum of elements;
// this is useful for checking weight requirements.
800
template <typename T1, typename T2>
801
inline static void ObtainMinMaxSum(const T1 *w, int nw, T1 *mi, T1 *ma, T2 *su) {
802
803
804
805
  T1 minw;
  T1 maxw;
  T1 sumw;
  int i;
806
  if (nw & 1) {  // odd
807
808
809
810
    minw = w[0];
    maxw = w[0];
    sumw = w[0];
    i = 2;
811
  } else {  // even
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
    if (w[0] < w[1]) {
      minw = w[0];
      maxw = w[1];
    } else {
      minw = w[1];
      maxw = w[0];
    }
    sumw = w[0] + w[1];
    i = 3;
  }
  for (; i < nw; i += 2) {
    if (w[i - 1] < w[i]) {
      minw = std::min(minw, w[i - 1]);
      maxw = std::max(maxw, w[i]);
    } else {
      minw = std::min(minw, w[i]);
      maxw = std::max(maxw, w[i - 1]);
    }
    sumw += w[i - 1] + w[i];
  }
  if (mi != nullptr) {
    *mi = minw;
  }
  if (ma != nullptr) {
    *ma = maxw;
  }
  if (su != nullptr) {
    *su = static_cast<T2>(sumw);
  }
841
842
}

843
inline static std::vector<uint32_t> EmptyBitset(int n) {
844
  int size = n / 32;
845
  if (n % 32 != 0) ++size;
846
847
848
849
  return std::vector<uint32_t>(size);
}

template<typename T>
Guolin Ke's avatar
Guolin Ke committed
850
inline static void InsertBitset(std::vector<uint32_t>* vec, const T val) {
Guolin Ke's avatar
Guolin Ke committed
851
852
853
854
855
856
857
  auto& ref_v = *vec;
  int i1 = val / 32;
  int i2 = val % 32;
  if (static_cast<int>(vec->size()) < i1 + 1) {
    vec->resize(i1 + 1, 0);
  }
  ref_v[i1] |= (1 << i2);
858
859
}

860
861
template<typename T>
inline static std::vector<uint32_t> ConstructBitset(const T* vals, int n) {
862
863
864
865
866
867
868
869
870
871
872
873
  std::vector<uint32_t> ret;
  for (int i = 0; i < n; ++i) {
    int i1 = vals[i] / 32;
    int i2 = vals[i] % 32;
    if (static_cast<int>(ret.size()) < i1 + 1) {
      ret.resize(i1 + 1, 0);
    }
    ret[i1] |= (1 << i2);
  }
  return ret;
}

874
875
template<typename T>
inline static bool FindInBitset(const uint32_t* bits, int n, T pos) {
876
877
878
879
880
881
882
883
  int i1 = pos / 32;
  if (i1 >= n) {
    return false;
  }
  int i2 = pos % 32;
  return (bits[i1] >> i2) & 1;
}

884
885
886
887
888
889
890
891
892
inline static bool CheckDoubleEqualOrdered(double a, double b) {
  double upper = std::nextafter(a, INFINITY);
  return b <= upper;
}

inline static double GetDoubleUpperBound(double a) {
  return std::nextafter(a, INFINITY);;
}

893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
inline static size_t GetLine(const char* str) {
  auto start = str;
  while (*str != '\0' && *str != '\n' && *str != '\r') {
    ++str;
  }
  return str - start;
}

inline static const char* SkipNewLine(const char* str) {
  if (*str == '\r') {
    ++str;
  }
  if (*str == '\n') {
    ++str;
  }
  return str;
}

911
912
913
914
915
template <typename T>
static int Sign(T x) {
  return (x > T(0)) - (x < T(0));
}

Guolin Ke's avatar
Guolin Ke committed
916
917
918
919
920
921
922
923
924
template <typename T>
static T SafeLog(T x) {
  if (x > 0) {
    return std::log(x);
  } else {
    return -INFINITY;
  }
}

925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
inline bool CheckAllowedJSON(const std::string& s) {
  unsigned char char_code;
  for (auto c : s) {
    char_code = static_cast<unsigned char>(c);
    if (char_code == 34      // "
        || char_code == 44   // ,
        || char_code == 58   // :
        || char_code == 91   // [
        || char_code == 93   // ]
        || char_code == 123  // {
        || char_code == 125  // }
        ) {
      return false;
    }
  }
  return true;
}

943
944
945
946
947
948
inline int RoundInt(double x) {
  return static_cast<int>(x + 0.5f);
}

template <typename T, std::size_t N = 32>
class AlignmentAllocator {
949
 public:
950
951
952
953
954
955
956
957
958
959
  typedef T value_type;
  typedef std::size_t size_type;
  typedef std::ptrdiff_t difference_type;

  typedef T* pointer;
  typedef const T* const_pointer;

  typedef T& reference;
  typedef const T& const_reference;

960
  inline AlignmentAllocator() throw() {}
961
962

  template <typename T2>
963
  inline AlignmentAllocator(const AlignmentAllocator<T2, N>&) throw() {}
964

965
  inline ~AlignmentAllocator() throw() {}
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990

  inline pointer adress(reference r) {
    return &r;
  }

  inline const_pointer adress(const_reference r) const {
    return &r;
  }

  inline pointer allocate(size_type n) {
    return (pointer)_mm_malloc(n * sizeof(value_type), N);
  }

  inline void deallocate(pointer p, size_type) {
    _mm_free(p);
  }

  inline void construct(pointer p, const value_type& wert) {
    new (p) value_type(wert);
  }

  inline void destroy(pointer p) {
    p->~value_type();
  }

991
  inline size_type max_size() const throw() {
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
    return size_type(-1) / sizeof(value_type);
  }

  template <typename T2>
  struct rebind {
    typedef AlignmentAllocator<T2, N> other;
  };

  bool operator!=(const AlignmentAllocator<T, N>& other) const {
    return !(*this == other);
  }

  // Returns true if and only if storage allocated from *this
  // can be deallocated from other, and vice versa.
  // Always returns true for stateless allocators.
  bool operator==(const AlignmentAllocator<T, N>&) const {
    return true;
  }
};

class Timer {
 public:
Guolin Ke's avatar
Guolin Ke committed
1014
1015
  Timer() {
#ifdef TIMETAG
1016
    int num_threads = OMP_NUM_THREADS();
Guolin Ke's avatar
Guolin Ke committed
1017
1018
1019
    start_time_.resize(num_threads);
    stats_.resize(num_threads);
#endif  // TIMETAG
1020
  }
1021

Guolin Ke's avatar
Guolin Ke committed
1022
1023
1024
  ~Timer() { Print(); }

#ifdef TIMETAG
1025
  void Start(const std::string& name) {
Guolin Ke's avatar
Guolin Ke committed
1026
1027
    auto tid = omp_get_thread_num();
    start_time_[tid][name] = std::chrono::steady_clock::now();
1028
  }
1029

1030
  void Stop(const std::string& name) {
Guolin Ke's avatar
Guolin Ke committed
1031
1032
1033
1034
    auto cur_time = std::chrono::steady_clock::now();
    auto tid = omp_get_thread_num();
    if (stats_[tid].find(name) == stats_[tid].end()) {
      stats_[tid][name] = std::chrono::duration<double, std::milli>(0);
1035
    }
Guolin Ke's avatar
Guolin Ke committed
1036
    stats_[tid][name] += cur_time - start_time_[tid][name];
1037
  }
1038

Guolin Ke's avatar
Guolin Ke committed
1039
1040
#else
  void Start(const std::string&) {}
1041

Guolin Ke's avatar
Guolin Ke committed
1042
1043
  void Stop(const std::string&) {}
#endif  // TIMETAG
1044
1045

  void Print() const {
Guolin Ke's avatar
Guolin Ke committed
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
#ifdef TIMETAG
    std::unordered_map<std::string, std::chrono::duration<double, std::milli>>
        stats(stats_[0].begin(), stats_[0].end());
    for (size_t i = 1; i < stats_.size(); ++i) {
      for (auto it = stats_[i].begin(); it != stats_[i].end(); ++it) {
        if (stats.find(it->first) == stats.end()) {
          stats[it->first] = it->second;
        } else {
          stats[it->first] += it->second;
        }
      }
    }
    std::map<std::string, std::chrono::duration<double, std::milli>> ordered(
        stats.begin(), stats.end());
1060
    for (auto it = ordered.begin(); it != ordered.end(); ++it) {
1061
      Log::Info("%s costs:\t %f", it->first.c_str(), it->second * 1e-3);
1062
    }
Guolin Ke's avatar
Guolin Ke committed
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
#endif  // TIMETAG
  }
#ifdef TIMETAG
  std::vector<
      std::unordered_map<std::string, std::chrono::steady_clock::time_point>>
      start_time_;
  std::vector<std::unordered_map<std::string,
                                 std::chrono::duration<double, std::milli>>>
      stats_;
#endif  // TIMETAG
1073
1074
1075
1076
1077
};

// Note: this class is not thread-safe, don't use it inside omp blocks
class FunctionTimer {
 public:
1078
#ifdef TIMETAG
Guolin Ke's avatar
Guolin Ke committed
1079
  FunctionTimer(const std::string& name, Timer& timer) : timer_(timer) {
1080
1081
1082
    timer.Start(name);
    name_ = name;
  }
1083

Guolin Ke's avatar
Guolin Ke committed
1084
  ~FunctionTimer() { timer_.Stop(name_); }
1085

1086
1087
1088
 private:
  std::string name_;
  Timer& timer_;
1089
1090
1091
#else
  FunctionTimer(const std::string&, Timer&) {}
#endif  // TIMETAG
1092
1093
};

Guolin Ke's avatar
Guolin Ke committed
1094
1095
}  // namespace Common

1096
1097
extern Common::Timer global_timer;

Guolin Ke's avatar
Guolin Ke committed
1098
1099
}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
1100
#endif   // LightGBM_UTILS_COMMON_FUN_H_