common.h 28.5 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#ifndef LIGHTGBM_UTILS_COMMON_FUN_H_
#define LIGHTGBM_UTILS_COMMON_FUN_H_

8
9
10
#include <LightGBM/utils/log.h>
#include <LightGBM/utils/openmp_wrapper.h>

11
#include <limits>
Guolin Ke's avatar
Guolin Ke committed
12
#include <string>
13
#include <algorithm>
14
#include <chrono>
15
#include <cmath>
16
17
#include <cstdint>
#include <cstdio>
Guolin Ke's avatar
Guolin Ke committed
18
#include <functional>
19
#include <iomanip>
20
#include <iterator>
21
#include <map>
22
23
#include <memory>
#include <sstream>
Guolin Ke's avatar
Guolin Ke committed
24
#include <type_traits>
25
#include <unordered_map>
26
27
#include <utility>
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
28

29
30
31
32
33
#ifdef _MSC_VER
#include <intrin.h>
#pragma intrinsic(_BitScanReverse)
#endif

34
#if defined(_MSC_VER)
35
36
37
38
39
40
41
42
43
44
45
#include <malloc.h>
#elif MM_MALLOC
#include <mm_malloc.h>
#elif defined(__GNUC__)
#include <malloc.h>
#define _mm_malloc(a, b) memalign(b, a)
#define _mm_free(a) free(a)
#else
#include <stdlib.h>
#define _mm_malloc(a, b) malloc(a)
#define _mm_free(a) free(a)
46
47
#endif

Guolin Ke's avatar
Guolin Ke committed
48
49
50
51
namespace LightGBM {

namespace Common {

52
inline static char tolower(char in) {
Guolin Ke's avatar
Guolin Ke committed
53
54
55
56
57
  if (in <= 'Z' && in >= 'A')
    return in - ('Z' - 'z');
  return in;
}

58
inline static std::string Trim(std::string str) {
Guolin Ke's avatar
Guolin Ke committed
59
  if (str.empty()) {
Guolin Ke's avatar
Guolin Ke committed
60
61
62
63
64
65
66
    return str;
  }
  str.erase(str.find_last_not_of(" \f\n\r\t\v") + 1);
  str.erase(0, str.find_first_not_of(" \f\n\r\t\v"));
  return str;
}

67
inline static std::string RemoveQuotationSymbol(std::string str) {
Guolin Ke's avatar
Guolin Ke committed
68
  if (str.empty()) {
69
70
71
72
73
74
    return str;
  }
  str.erase(str.find_last_not_of("'\"") + 1);
  str.erase(0, str.find_first_not_of("'\""));
  return str;
}
Guolin Ke's avatar
Guolin Ke committed
75

Guolin Ke's avatar
Guolin Ke committed
76
77
78
79
80
81
82
inline static bool StartsWith(const std::string& str, const std::string prefix) {
  if (str.substr(0, prefix.size()) == prefix) {
    return true;
  } else {
    return false;
  }
}
Guolin Ke's avatar
Guolin Ke committed
83

Guolin Ke's avatar
Guolin Ke committed
84
inline static std::vector<std::string> Split(const char* c_str, char delimiter) {
Guolin Ke's avatar
Guolin Ke committed
85
  std::vector<std::string> ret;
Guolin Ke's avatar
Guolin Ke committed
86
87
  std::string str(c_str);
  size_t i = 0;
Guolin Ke's avatar
Guolin Ke committed
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
  size_t pos = 0;
  while (pos < str.length()) {
    if (str[pos] == delimiter) {
      if (i < pos) {
        ret.push_back(str.substr(i, pos - i));
      }
      ++pos;
      i = pos;
    } else {
      ++pos;
    }
  }
  if (i < pos) {
    ret.push_back(str.substr(i));
  }
  return ret;
}

106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
inline static std::vector<std::string> SplitBrackets(const char* c_str, char left_delimiter, char right_delimiter) {
  std::vector<std::string> ret;
  std::string str(c_str);
  size_t i = 0;
  size_t pos = 0;
  bool open = false;
  while (pos < str.length()) {
    if (str[pos] == left_delimiter) {
      open = true;
      ++pos;
      i = pos;
    } else if (str[pos] == right_delimiter && open) {
      if (i < pos) {
        ret.push_back(str.substr(i, pos - i));
      }
      open = false;
      ++pos;
    } else {
      ++pos;
    }
  }
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
inline static std::vector<std::string> SplitLines(const char* c_str) {
  std::vector<std::string> ret;
  std::string str(c_str);
  size_t i = 0;
  size_t pos = 0;
  while (pos < str.length()) {
    if (str[pos] == '\n' || str[pos] == '\r') {
      if (i < pos) {
        ret.push_back(str.substr(i, pos - i));
      }
      // skip the line endings
      while (str[pos] == '\n' || str[pos] == '\r') ++pos;
      // new begin
      i = pos;
    } else {
      ++pos;
    }
  }
  if (i < pos) {
    ret.push_back(str.substr(i));
Guolin Ke's avatar
Guolin Ke committed
150
151
152
153
  }
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
154
155
156
157
inline static std::vector<std::string> Split(const char* c_str, const char* delimiters) {
  std::vector<std::string> ret;
  std::string str(c_str);
  size_t i = 0;
Guolin Ke's avatar
Guolin Ke committed
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
  size_t pos = 0;
  while (pos < str.length()) {
    bool met_delimiters = false;
    for (int j = 0; delimiters[j] != '\0'; ++j) {
      if (str[pos] == delimiters[j]) {
        met_delimiters = true;
        break;
      }
    }
    if (met_delimiters) {
      if (i < pos) {
        ret.push_back(str.substr(i, pos - i));
      }
      ++pos;
      i = pos;
    } else {
      ++pos;
    }
  }
  if (i < pos) {
    ret.push_back(str.substr(i));
Guolin Ke's avatar
Guolin Ke committed
179
180
181
182
  }
  return ret;
}

183
184
185
186
template<typename T>
inline static const char* Atoi(const char* p, T* out) {
  int sign;
  T value;
Guolin Ke's avatar
Guolin Ke committed
187
188
189
190
191
192
193
  while (*p == ' ') {
    ++p;
  }
  sign = 1;
  if (*p == '-') {
    sign = -1;
    ++p;
194
  } else if (*p == '+') {
Guolin Ke's avatar
Guolin Ke committed
195
196
197
198
199
    ++p;
  }
  for (value = 0; *p >= '0' && *p <= '9'; ++p) {
    value = value * 10 + (*p - '0');
  }
200
  *out = static_cast<T>(sign * value);
Guolin Ke's avatar
Guolin Ke committed
201
202
203
204
205
206
  while (*p == ' ') {
    ++p;
  }
  return p;
}

207
template<typename T>
208
209
210
211
212
213
214
215
216
217
218
219
220
221
inline static double Pow(T base, int power) {
  if (power < 0) {
    return 1.0 / Pow(base, -power);
  } else if (power == 0) {
    return 1;
  } else if (power % 2 == 0) {
    return Pow(base*base, power / 2);
  } else if (power % 3 == 0) {
    return Pow(base*base*base, power / 3);
  } else {
    return base * Pow(base, power - 1);
  }
}

222
inline static const char* Atof(const char* p, double* out) {
Guolin Ke's avatar
Guolin Ke committed
223
  int frac;
224
  double sign, value, scale;
Guolin Ke's avatar
Guolin Ke committed
225
  *out = NAN;
Guolin Ke's avatar
Guolin Ke committed
226
227
228
229
230
  // Skip leading white space, if any.
  while (*p == ' ') {
    ++p;
  }
  // Get sign, if any.
231
  sign = 1.0;
Guolin Ke's avatar
Guolin Ke committed
232
  if (*p == '-') {
233
    sign = -1.0;
Guolin Ke's avatar
Guolin Ke committed
234
    ++p;
235
  } else if (*p == '+') {
Guolin Ke's avatar
Guolin Ke committed
236
237
238
    ++p;
  }

Guolin Ke's avatar
Guolin Ke committed
239
240
241
  // is a number
  if ((*p >= '0' && *p <= '9') || *p == '.' || *p == 'e' || *p == 'E') {
    // Get digits before decimal point or exponent, if any.
242
243
    for (value = 0.0; *p >= '0' && *p <= '9'; ++p) {
      value = value * 10.0 + (*p - '0');
Guolin Ke's avatar
Guolin Ke committed
244
    }
Guolin Ke's avatar
Guolin Ke committed
245

Guolin Ke's avatar
Guolin Ke committed
246
247
    // Get digits after decimal point, if any.
    if (*p == '.') {
248
249
      double right = 0.0;
      int nn = 0;
Guolin Ke's avatar
Guolin Ke committed
250
      ++p;
Guolin Ke's avatar
Guolin Ke committed
251
      while (*p >= '0' && *p <= '9') {
252
253
        right = (*p - '0') + right * 10.0;
        ++nn;
Guolin Ke's avatar
Guolin Ke committed
254
255
        ++p;
      }
256
      value += right / Pow(10.0, nn);
Guolin Ke's avatar
Guolin Ke committed
257
258
    }

Guolin Ke's avatar
Guolin Ke committed
259
260
    // Handle exponent, if any.
    frac = 0;
261
    scale = 1.0;
Guolin Ke's avatar
Guolin Ke committed
262
    if ((*p == 'e') || (*p == 'E')) {
Guolin Ke's avatar
Guolin Ke committed
263
      uint32_t expon;
Guolin Ke's avatar
Guolin Ke committed
264
      // Get sign of exponent, if any.
Guolin Ke's avatar
Guolin Ke committed
265
      ++p;
Guolin Ke's avatar
Guolin Ke committed
266
267
268
269
270
271
272
273
274
275
      if (*p == '-') {
        frac = 1;
        ++p;
      } else if (*p == '+') {
        ++p;
      }
      // Get digits of exponent, if any.
      for (expon = 0; *p >= '0' && *p <= '9'; ++p) {
        expon = expon * 10 + (*p - '0');
      }
276
277
278
      if (expon > 308) expon = 308;
      // Calculate scaling factor.
      while (expon >= 50) { scale *= 1E50; expon -= 50; }
Guolin Ke's avatar
Guolin Ke committed
279
      while (expon >= 8) { scale *= 1E8;  expon -= 8; }
280
      while (expon > 0) { scale *= 10.0; expon -= 1; }
Guolin Ke's avatar
Guolin Ke committed
281
    }
Guolin Ke's avatar
Guolin Ke committed
282
283
284
    // Return signed and scaled floating point result.
    *out = sign * (frac ? (value / scale) : (value * scale));
  } else {
285
    size_t cnt = 0;
286
    while (*(p + cnt) != '\0' && *(p + cnt) != ' '
287
288
289
           && *(p + cnt) != '\t' && *(p + cnt) != ','
           && *(p + cnt) != '\n' && *(p + cnt) != '\r'
           && *(p + cnt) != ':') {
290
291
      ++cnt;
    }
292
    if (cnt > 0) {
Guolin Ke's avatar
Guolin Ke committed
293
      std::string tmp_str(p, cnt);
Guolin Ke's avatar
Guolin Ke committed
294
      std::transform(tmp_str.begin(), tmp_str.end(), tmp_str.begin(), Common::tolower);
zhangjin's avatar
zhangjin committed
295
296
      if (tmp_str == std::string("na") || tmp_str == std::string("nan") ||
          tmp_str == std::string("null")) {
Guolin Ke's avatar
Guolin Ke committed
297
        *out = NAN;
298
      } else if (tmp_str == std::string("inf") || tmp_str == std::string("infinity")) {
299
        *out = sign * 1e308;
300
      } else {
301
        Log::Fatal("Unknown token %s in data file", tmp_str.c_str());
Guolin Ke's avatar
Guolin Ke committed
302
303
      }
      p += cnt;
304
    }
Guolin Ke's avatar
Guolin Ke committed
305
  }
Guolin Ke's avatar
Guolin Ke committed
306

Guolin Ke's avatar
Guolin Ke committed
307
308
309
  while (*p == ' ') {
    ++p;
  }
Guolin Ke's avatar
Guolin Ke committed
310

Guolin Ke's avatar
Guolin Ke committed
311
312
313
  return p;
}

314
inline static bool AtoiAndCheck(const char* p, int* out) {
315
316
317
318
319
320
321
  const char* after = Atoi(p, out);
  if (*after != '\0') {
    return false;
  }
  return true;
}

322
inline static bool AtofAndCheck(const char* p, double* out) {
323
324
325
326
327
328
329
  const char* after = Atof(p, out);
  if (*after != '\0') {
    return false;
  }
  return true;
}

330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
inline static unsigned CountDecimalDigit32(uint32_t n) {
#if defined(_MSC_VER) || defined(__GNUC__)
  static const uint32_t powers_of_10[] = {
    0,
    10,
    100,
    1000,
    10000,
    100000,
    1000000,
    10000000,
    100000000,
    1000000000
  };
#ifdef _MSC_VER
Guolin Ke's avatar
Guolin Ke committed
345
  // NOLINTNEXTLINE
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
  unsigned long i = 0;
  _BitScanReverse(&i, n | 1);
  uint32_t t = (i + 1) * 1233 >> 12;
#elif __GNUC__
  uint32_t t = (32 - __builtin_clz(n | 1)) * 1233 >> 12;
#endif
  return t - (n < powers_of_10[t]) + 1;
#else
  if (n < 10) return 1;
  if (n < 100) return 2;
  if (n < 1000) return 3;
  if (n < 10000) return 4;
  if (n < 100000) return 5;
  if (n < 1000000) return 6;
  if (n < 10000000) return 7;
  if (n < 100000000) return 8;
  if (n < 1000000000) return 9;
  return 10;
#endif
}

inline static void Uint32ToStr(uint32_t value, char* buffer) {
  const char kDigitsLut[200] = {
369
370
371
372
373
374
375
376
377
378
    '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
    '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
    '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9',
    '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9',
    '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9',
    '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
    '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
    '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9',
    '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9',
    '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'
379
380
381
382
383
384
385
386
387
388
389
390
391
  };
  unsigned digit = CountDecimalDigit32(value);
  buffer += digit;
  *buffer = '\0';

  while (value >= 100) {
    const unsigned i = (value % 100) << 1;
    value /= 100;
    *--buffer = kDigitsLut[i + 1];
    *--buffer = kDigitsLut[i];
  }

  if (value < 10) {
392
    *--buffer = static_cast<char>(value) + '0';
393
  } else {
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
    const unsigned i = value << 1;
    *--buffer = kDigitsLut[i + 1];
    *--buffer = kDigitsLut[i];
  }
}

inline static void Int32ToStr(int32_t value, char* buffer) {
  uint32_t u = static_cast<uint32_t>(value);
  if (value < 0) {
    *buffer++ = '-';
    u = ~u + 1;
  }
  Uint32ToStr(u, buffer);
}

409
inline static void DoubleToStr(double value, char* buffer, size_t buffer_len) {
410
  #ifdef _MSC_VER
411
  int num_chars = sprintf_s(buffer, buffer_len, "%.17g", value);
412
  #else
413
  int num_chars = snprintf(buffer, buffer_len, "%.17g", value);
414
  #endif
415
  CHECK_GE(num_chars, 0);
416
417
}

Guolin Ke's avatar
Guolin Ke committed
418
419
420
421
422
423
424
425
426
427
428
429
430
431
inline static const char* SkipSpaceAndTab(const char* p) {
  while (*p == ' ' || *p == '\t') {
    ++p;
  }
  return p;
}

inline static const char* SkipReturn(const char* p) {
  while (*p == '\n' || *p == '\r' || *p == ' ') {
    ++p;
  }
  return p;
}

Guolin Ke's avatar
Guolin Ke committed
432
433
template<typename T, typename T2>
inline static std::vector<T2> ArrayCast(const std::vector<T>& arr) {
434
  std::vector<T2> ret(arr.size());
Guolin Ke's avatar
Guolin Ke committed
435
  for (size_t i = 0; i < arr.size(); ++i) {
436
    ret[i] = static_cast<T2>(arr[i]);
Guolin Ke's avatar
Guolin Ke committed
437
  }
Guolin Ke's avatar
Guolin Ke committed
438
  return ret;
Guolin Ke's avatar
Guolin Ke committed
439
440
}

441
442
template<typename T, bool is_float, bool is_unsign>
struct __TToStringHelperFast {
443
  void operator()(T value, char* buffer, size_t) const {
444
445
446
447
448
449
    Int32ToStr(value, buffer);
  }
};

template<typename T>
struct __TToStringHelperFast<T, true, false> {
450
  void operator()(T value, char* buffer, size_t buf_len)
451
  const {
452
    #ifdef _MSC_VER
453
    int num_chars = sprintf_s(buffer, buf_len, "%g", value);
454
    #else
455
    int num_chars = snprintf(buffer, buf_len, "%g", value);
456
    #endif
457
    CHECK_GE(num_chars, 0);
458
459
460
461
462
  }
};

template<typename T>
struct __TToStringHelperFast<T, false, true> {
463
  void operator()(T value, char* buffer, size_t) const {
464
465
466
467
    Uint32ToStr(value, buffer);
  }
};

468
template<typename T>
469
470
inline static std::string ArrayToStringFast(const std::vector<T>& arr, size_t n) {
  if (arr.empty() || n == 0) {
471
    return std::string("");
Guolin Ke's avatar
Guolin Ke committed
472
  }
473
474
475
  __TToStringHelperFast<T, std::is_floating_point<T>::value, std::is_unsigned<T>::value> helper;
  const size_t buf_len = 16;
  std::vector<char> buffer(buf_len);
476
  std::stringstream str_buf;
477
478
479
480
481
  helper(arr[0], buffer.data(), buf_len);
  str_buf << buffer.data();
  for (size_t i = 1; i < std::min(n, arr.size()); ++i) {
    helper(arr[i], buffer.data(), buf_len);
    str_buf << ' ' << buffer.data();
Guolin Ke's avatar
Guolin Ke committed
482
  }
483
  return str_buf.str();
Guolin Ke's avatar
Guolin Ke committed
484
485
}

486
inline static std::string ArrayToString(const std::vector<double>& arr, size_t n) {
Guolin Ke's avatar
Guolin Ke committed
487
488
489
  if (arr.empty() || n == 0) {
    return std::string("");
  }
490
491
  const size_t buf_len = 32;
  std::vector<char> buffer(buf_len);
Guolin Ke's avatar
Guolin Ke committed
492
  std::stringstream str_buf;
493
494
  DoubleToStr(arr[0], buffer.data(), buf_len);
  str_buf << buffer.data();
Guolin Ke's avatar
Guolin Ke committed
495
  for (size_t i = 1; i < std::min(n, arr.size()); ++i) {
496
497
    DoubleToStr(arr[i], buffer.data(), buf_len);
    str_buf << ' ' << buffer.data();
Guolin Ke's avatar
Guolin Ke committed
498
499
500
501
  }
  return str_buf.str();
}

502
503
504
template<typename T, bool is_float>
struct __StringToTHelper {
  T operator()(const std::string& str) const {
505
506
507
    T ret = 0;
    Atoi(str.c_str(), &ret);
    return ret;
508
509
510
511
512
513
514
515
516
517
  }
};

template<typename T>
struct __StringToTHelper<T, true> {
  T operator()(const std::string& str) const {
    return static_cast<T>(std::stod(str));
  }
};

Guolin Ke's avatar
Guolin Ke committed
518
template<typename T>
519
inline static std::vector<T> StringToArray(const std::string& str, char delimiter) {
Guolin Ke's avatar
Guolin Ke committed
520
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
521
522
  std::vector<T> ret;
  ret.reserve(strs.size());
523
  __StringToTHelper<T, std::is_floating_point<T>::value> helper;
524
525
  for (const auto& s : strs) {
    ret.push_back(helper(s));
Guolin Ke's avatar
Guolin Ke committed
526
527
528
529
  }
  return ret;
}

530
531
532
533
534
535
536
537
538
539
540
template<typename T>
inline static std::vector<std::vector<T>> StringToArrayofArrays(
    const std::string& str, char left_bracket, char right_bracket, char delimiter) {
  std::vector<std::string> strs = SplitBrackets(str.c_str(), left_bracket, right_bracket);
  std::vector<std::vector<T>> ret;
  for (const auto& s : strs) {
    ret.push_back(StringToArray<T>(s, delimiter));
  }
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
541
template<typename T>
542
543
544
545
546
inline static std::vector<T> StringToArray(const std::string& str, int n) {
  if (n == 0) {
    return std::vector<T>();
  }
  std::vector<std::string> strs = Split(str.c_str(), ' ');
Nikita Titov's avatar
Nikita Titov committed
547
  CHECK_EQ(strs.size(), static_cast<size_t>(n));
Guolin Ke's avatar
Guolin Ke committed
548
  std::vector<T> ret;
549
550
551
552
  ret.reserve(strs.size());
  __StringToTHelper<T, std::is_floating_point<T>::value> helper;
  for (const auto& s : strs) {
    ret.push_back(helper(s));
Guolin Ke's avatar
Guolin Ke committed
553
554
555
556
  }
  return ret;
}

557
558
559
560
561
562
563
564
565
566
567
568
template<typename T, bool is_float>
struct __StringToTHelperFast {
  const char* operator()(const char*p, T* out) const {
    return Atoi(p, out);
  }
};

template<typename T>
struct __StringToTHelperFast<T, true> {
  const char* operator()(const char*p, T* out) const {
    double tmp = 0.0f;
    auto ret = Atof(p, &tmp);
569
    *out = static_cast<T>(tmp);
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
    return ret;
  }
};

template<typename T>
inline static std::vector<T> StringToArrayFast(const std::string& str, int n) {
  if (n == 0) {
    return std::vector<T>();
  }
  auto p_str = str.c_str();
  __StringToTHelperFast<T, std::is_floating_point<T>::value> helper;
  std::vector<T> ret(n);
  for (int i = 0; i < n; ++i) {
    p_str = helper(p_str, &ret[i]);
  }
  return ret;
}

588
template<typename T>
Guolin Ke's avatar
Guolin Ke committed
589
inline static std::string Join(const std::vector<T>& strs, const char* delimiter) {
Guolin Ke's avatar
Guolin Ke committed
590
  if (strs.empty()) {
Guolin Ke's avatar
Guolin Ke committed
591
592
    return std::string("");
  }
593
  std::stringstream str_buf;
594
  str_buf << std::setprecision(std::numeric_limits<double>::digits10 + 2);
595
  str_buf << strs[0];
Guolin Ke's avatar
Guolin Ke committed
596
  for (size_t i = 1; i < strs.size(); ++i) {
597
598
    str_buf << delimiter;
    str_buf << strs[i];
Guolin Ke's avatar
Guolin Ke committed
599
  }
600
  return str_buf.str();
Guolin Ke's avatar
Guolin Ke committed
601
602
}

603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
template<>
inline std::string Join<int8_t>(const std::vector<int8_t>& strs, const char* delimiter) {
  if (strs.empty()) {
    return std::string("");
  }
  std::stringstream str_buf;
  str_buf << std::setprecision(std::numeric_limits<double>::digits10 + 2);
  str_buf << static_cast<int16_t>(strs[0]);
  for (size_t i = 1; i < strs.size(); ++i) {
    str_buf << delimiter;
    str_buf << static_cast<int16_t>(strs[i]);
  }
  return str_buf.str();
}

618
template<typename T>
Guolin Ke's avatar
Guolin Ke committed
619
inline static std::string Join(const std::vector<T>& strs, size_t start, size_t end, const char* delimiter) {
Guolin Ke's avatar
Guolin Ke committed
620
621
622
  if (end - start <= 0) {
    return std::string("");
  }
Guolin Ke's avatar
Guolin Ke committed
623
624
  start = std::min(start, static_cast<size_t>(strs.size()) - 1);
  end = std::min(end, static_cast<size_t>(strs.size()));
625
  std::stringstream str_buf;
626
  str_buf << std::setprecision(std::numeric_limits<double>::digits10 + 2);
627
  str_buf << strs[start];
Guolin Ke's avatar
Guolin Ke committed
628
  for (size_t i = start + 1; i < end; ++i) {
629
630
    str_buf << delimiter;
    str_buf << strs[i];
Guolin Ke's avatar
Guolin Ke committed
631
  }
632
  return str_buf.str();
Guolin Ke's avatar
Guolin Ke committed
633
634
}

635
inline static int64_t Pow2RoundUp(int64_t x) {
Guolin Ke's avatar
Guolin Ke committed
636
637
638
639
640
641
642
643
644
645
  int64_t t = 1;
  for (int i = 0; i < 64; ++i) {
    if (t >= x) {
      return t;
    }
    t <<= 1;
  }
  return 0;
}

646
/*!
647
 * \brief Do inplace softmax transformation on p_rec
648
649
 * \param p_rec The input/output vector of the values.
 */
650
inline static void Softmax(std::vector<double>* p_rec) {
651
652
  std::vector<double> &rec = *p_rec;
  double wmax = rec[0];
653
654
655
  for (size_t i = 1; i < rec.size(); ++i) {
    wmax = std::max(rec[i], wmax);
  }
656
  double wsum = 0.0f;
657
658
659
660
661
  for (size_t i = 0; i < rec.size(); ++i) {
    rec[i] = std::exp(rec[i] - wmax);
    wsum += rec[i];
  }
  for (size_t i = 0; i < rec.size(); ++i) {
662
    rec[i] /= static_cast<double>(wsum);
663
664
665
  }
}

666
inline static void Softmax(const double* input, double* output, int len) {
Guolin Ke's avatar
Guolin Ke committed
667
  double wmax = input[0];
668
  for (int i = 1; i < len; ++i) {
Guolin Ke's avatar
Guolin Ke committed
669
    wmax = std::max(input[i], wmax);
670
671
672
  }
  double wsum = 0.0f;
  for (int i = 0; i < len; ++i) {
Guolin Ke's avatar
Guolin Ke committed
673
674
    output[i] = std::exp(input[i] - wmax);
    wsum += output[i];
675
676
  }
  for (int i = 0; i < len; ++i) {
Guolin Ke's avatar
Guolin Ke committed
677
    output[i] /= static_cast<double>(wsum);
678
679
680
  }
}

Guolin Ke's avatar
Guolin Ke committed
681
682
683
template<typename T>
std::vector<const T*> ConstPtrInVectorWrapper(const std::vector<std::unique_ptr<T>>& input) {
  std::vector<const T*> ret;
Guolin Ke's avatar
Guolin Ke committed
684
685
  for (auto t = input.begin(); t !=input.end(); ++t) {
    ret.push_back(t->get());
686
  }
Guolin Ke's avatar
Guolin Ke committed
687
  return ret;
688
689
}

Guolin Ke's avatar
Guolin Ke committed
690
template<typename T1, typename T2>
Guolin Ke's avatar
Guolin Ke committed
691
inline static void SortForPair(std::vector<T1>* keys, std::vector<T2>* values, size_t start, bool is_reverse = false) {
Guolin Ke's avatar
Guolin Ke committed
692
  std::vector<std::pair<T1, T2>> arr;
Guolin Ke's avatar
Guolin Ke committed
693
694
  auto& ref_key = *keys;
  auto& ref_value = *values;
Guolin Ke's avatar
Guolin Ke committed
695
  for (size_t i = start; i < keys->size(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
696
    arr.emplace_back(ref_key[i], ref_value[i]);
Guolin Ke's avatar
Guolin Ke committed
697
698
  }
  if (!is_reverse) {
699
    std::stable_sort(arr.begin(), arr.end(), [](const std::pair<T1, T2>& a, const std::pair<T1, T2>& b) {
Guolin Ke's avatar
Guolin Ke committed
700
701
702
      return a.first < b.first;
    });
  } else {
703
    std::stable_sort(arr.begin(), arr.end(), [](const std::pair<T1, T2>& a, const std::pair<T1, T2>& b) {
Guolin Ke's avatar
Guolin Ke committed
704
705
706
707
      return a.first > b.first;
    });
  }
  for (size_t i = start; i < arr.size(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
708
709
    ref_key[i] = arr[i].first;
    ref_value[i] = arr[i].second;
Guolin Ke's avatar
Guolin Ke committed
710
711
712
  }
}

713
template <typename T>
Guolin Ke's avatar
Guolin Ke committed
714
715
inline static std::vector<T*> Vector2Ptr(std::vector<std::vector<T>>* data) {
  std::vector<T*> ptr(data->size());
Guolin Ke's avatar
Guolin Ke committed
716
  auto& ref_data = *data;
Guolin Ke's avatar
Guolin Ke committed
717
  for (size_t i = 0; i < data->size(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
718
    ptr[i] = ref_data[i].data();
719
720
721
722
723
724
725
726
727
728
729
730
731
  }
  return ptr;
}

template <typename T>
inline static std::vector<int> VectorSize(const std::vector<std::vector<T>>& data) {
  std::vector<int> ret(data.size());
  for (size_t i = 0; i < data.size(); ++i) {
    ret[i] = static_cast<int>(data[i].size());
  }
  return ret;
}

Guolin Ke's avatar
Guolin Ke committed
732
inline static double AvoidInf(double x) {
Guolin Ke's avatar
Guolin Ke committed
733
734
735
  if (std::isnan(x)) {
    return 0.0;
  } else if (x >= 1e300) {
Guolin Ke's avatar
Guolin Ke committed
736
    return 1e300;
737
  } else if (x <= -1e300) {
Guolin Ke's avatar
Guolin Ke committed
738
    return -1e300;
Guolin Ke's avatar
Guolin Ke committed
739
740
741
742
743
  } else {
    return x;
  }
}

744
inline static float AvoidInf(float x) {
Guolin Ke's avatar
Guolin Ke committed
745
  if (std::isnan(x)) {
Guolin Ke's avatar
Guolin Ke committed
746
747
    return 0.0f;
  } else if (x >= 1e38) {
748
749
750
751
752
753
    return 1e38f;
  } else if (x <= -1e38) {
    return -1e38f;
  } else {
    return x;
  }
754
755
756
}

template<typename _Iter> inline
757
758
759
760
static typename std::iterator_traits<_Iter>::value_type* IteratorValType(_Iter) {
  return (0);
}

761
template<typename _RanIt, typename _Pr, typename _VTRanIt> inline
762
763
764
static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred, _VTRanIt*) {
  size_t len = _Last - _First;
  const size_t kMinInnerLen = 1024;
765
  int num_threads = OMP_NUM_THREADS();
766
767
768
769
770
771
772
  if (len <= kMinInnerLen || num_threads <= 1) {
    std::sort(_First, _Last, _Pred);
    return;
  }
  size_t inner_size = (len + num_threads - 1) / num_threads;
  inner_size = std::max(inner_size, kMinInnerLen);
  num_threads = static_cast<int>((len + inner_size - 1) / inner_size);
773
#pragma omp parallel for schedule(static, 1)
774
775
776
777
778
779
780
781
782
783
784
  for (int i = 0; i < num_threads; ++i) {
    size_t left = inner_size*i;
    size_t right = left + inner_size;
    right = std::min(right, len);
    if (right > left) {
      std::sort(_First + left, _First + right, _Pred);
    }
  }
  // Buffer for merge.
  std::vector<_VTRanIt> temp_buf(len);
  _RanIt buf = temp_buf.begin();
785
  size_t s = inner_size;
786
787
788
  // Recursive merge
  while (s < len) {
    int loop_size = static_cast<int>((len + s * 2 - 1) / (s * 2));
789
    #pragma omp parallel for schedule(static, 1)
790
791
792
793
794
    for (int i = 0; i < loop_size; ++i) {
      size_t left = i * 2 * s;
      size_t mid = left + s;
      size_t right = mid + s;
      right = std::min(len, right);
Guolin Ke's avatar
Guolin Ke committed
795
      if (mid >= right) { continue; }
796
797
798
799
800
801
802
      std::copy(_First + left, _First + mid, buf + left);
      std::merge(buf + left, buf + mid, _First + mid, _First + right, _First + left, _Pred);
    }
    s *= 2;
  }
}

803
template<typename _RanIt, typename _Pr> inline
804
805
806
807
static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred) {
  return ParallelSort(_First, _Last, _Pred, IteratorValType(_First));
}

808
// Check that all y[] are in interval [ymin, ymax] (end points included); throws error if not
809
template <typename T>
810
inline static void CheckElementsIntervalClosed(const T *y, T ymin, T ymax, int ny, const char *callername) {
811
  auto fatal_msg = [&y, &ymin, &ymax, &callername](int i) {
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
    std::ostringstream os;
    os << "[%s]: does not tolerate element [#%i = " << y[i] << "] outside [" << ymin << ", " << ymax << "]";
    Log::Fatal(os.str().c_str(), callername, i);
  };
  for (int i = 1; i < ny; i += 2) {
    if (y[i - 1] < y[i]) {
      if (y[i - 1] < ymin) {
        fatal_msg(i - 1);
      } else if (y[i] > ymax) {
        fatal_msg(i);
      }
    } else {
      if (y[i - 1] > ymax) {
        fatal_msg(i - 1);
      } else if (y[i] < ymin) {
        fatal_msg(i);
      }
    }
  }
831
  if (ny & 1) {  // odd
832
833
    if (y[ny - 1] < ymin || y[ny - 1] > ymax) {
      fatal_msg(ny - 1);
834
835
836
837
838
839
    }
  }
}

// One-pass scan over array w with nw elements: find min, max and sum of elements;
// this is useful for checking weight requirements.
840
template <typename T1, typename T2>
841
inline static void ObtainMinMaxSum(const T1 *w, int nw, T1 *mi, T1 *ma, T2 *su) {
842
843
844
845
  T1 minw;
  T1 maxw;
  T1 sumw;
  int i;
846
  if (nw & 1) {  // odd
847
848
849
850
    minw = w[0];
    maxw = w[0];
    sumw = w[0];
    i = 2;
851
  } else {  // even
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
    if (w[0] < w[1]) {
      minw = w[0];
      maxw = w[1];
    } else {
      minw = w[1];
      maxw = w[0];
    }
    sumw = w[0] + w[1];
    i = 3;
  }
  for (; i < nw; i += 2) {
    if (w[i - 1] < w[i]) {
      minw = std::min(minw, w[i - 1]);
      maxw = std::max(maxw, w[i]);
    } else {
      minw = std::min(minw, w[i]);
      maxw = std::max(maxw, w[i - 1]);
    }
    sumw += w[i - 1] + w[i];
  }
  if (mi != nullptr) {
    *mi = minw;
  }
  if (ma != nullptr) {
    *ma = maxw;
  }
  if (su != nullptr) {
    *su = static_cast<T2>(sumw);
  }
881
882
}

883
inline static std::vector<uint32_t> EmptyBitset(int n) {
884
  int size = n / 32;
885
  if (n % 32 != 0) ++size;
886
887
888
889
  return std::vector<uint32_t>(size);
}

template<typename T>
Guolin Ke's avatar
Guolin Ke committed
890
inline static void InsertBitset(std::vector<uint32_t>* vec, const T val) {
Guolin Ke's avatar
Guolin Ke committed
891
892
893
894
895
896
897
  auto& ref_v = *vec;
  int i1 = val / 32;
  int i2 = val % 32;
  if (static_cast<int>(vec->size()) < i1 + 1) {
    vec->resize(i1 + 1, 0);
  }
  ref_v[i1] |= (1 << i2);
898
899
}

900
901
template<typename T>
inline static std::vector<uint32_t> ConstructBitset(const T* vals, int n) {
902
903
904
905
906
907
908
909
910
911
912
913
  std::vector<uint32_t> ret;
  for (int i = 0; i < n; ++i) {
    int i1 = vals[i] / 32;
    int i2 = vals[i] % 32;
    if (static_cast<int>(ret.size()) < i1 + 1) {
      ret.resize(i1 + 1, 0);
    }
    ret[i1] |= (1 << i2);
  }
  return ret;
}

914
915
template<typename T>
inline static bool FindInBitset(const uint32_t* bits, int n, T pos) {
916
917
918
919
920
921
922
923
  int i1 = pos / 32;
  if (i1 >= n) {
    return false;
  }
  int i2 = pos % 32;
  return (bits[i1] >> i2) & 1;
}

924
925
926
927
928
929
inline static bool CheckDoubleEqualOrdered(double a, double b) {
  double upper = std::nextafter(a, INFINITY);
  return b <= upper;
}

inline static double GetDoubleUpperBound(double a) {
Hongbin Shi's avatar
Hongbin Shi committed
930
  return std::nextafter(a, INFINITY);
931
932
}

933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
inline static size_t GetLine(const char* str) {
  auto start = str;
  while (*str != '\0' && *str != '\n' && *str != '\r') {
    ++str;
  }
  return str - start;
}

inline static const char* SkipNewLine(const char* str) {
  if (*str == '\r') {
    ++str;
  }
  if (*str == '\n') {
    ++str;
  }
  return str;
}

951
952
953
954
955
template <typename T>
static int Sign(T x) {
  return (x > T(0)) - (x < T(0));
}

Guolin Ke's avatar
Guolin Ke committed
956
957
958
959
960
961
962
963
964
template <typename T>
static T SafeLog(T x) {
  if (x > 0) {
    return std::log(x);
  } else {
    return -INFINITY;
  }
}

965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
inline bool CheckAllowedJSON(const std::string& s) {
  unsigned char char_code;
  for (auto c : s) {
    char_code = static_cast<unsigned char>(c);
    if (char_code == 34      // "
        || char_code == 44   // ,
        || char_code == 58   // :
        || char_code == 91   // [
        || char_code == 93   // ]
        || char_code == 123  // {
        || char_code == 125  // }
        ) {
      return false;
    }
  }
  return true;
}

983
984
985
986
987
988
inline int RoundInt(double x) {
  return static_cast<int>(x + 0.5f);
}

template <typename T, std::size_t N = 32>
class AlignmentAllocator {
989
 public:
990
991
992
993
994
995
996
997
998
999
  typedef T value_type;
  typedef std::size_t size_type;
  typedef std::ptrdiff_t difference_type;

  typedef T* pointer;
  typedef const T* const_pointer;

  typedef T& reference;
  typedef const T& const_reference;

1000
  inline AlignmentAllocator() throw() {}
1001
1002

  template <typename T2>
1003
  inline AlignmentAllocator(const AlignmentAllocator<T2, N>&) throw() {}
1004

1005
  inline ~AlignmentAllocator() throw() {}
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030

  inline pointer adress(reference r) {
    return &r;
  }

  inline const_pointer adress(const_reference r) const {
    return &r;
  }

  inline pointer allocate(size_type n) {
    return (pointer)_mm_malloc(n * sizeof(value_type), N);
  }

  inline void deallocate(pointer p, size_type) {
    _mm_free(p);
  }

  inline void construct(pointer p, const value_type& wert) {
    new (p) value_type(wert);
  }

  inline void destroy(pointer p) {
    p->~value_type();
  }

1031
  inline size_type max_size() const throw() {
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
    return size_type(-1) / sizeof(value_type);
  }

  template <typename T2>
  struct rebind {
    typedef AlignmentAllocator<T2, N> other;
  };

  bool operator!=(const AlignmentAllocator<T, N>& other) const {
    return !(*this == other);
  }

  // Returns true if and only if storage allocated from *this
  // can be deallocated from other, and vice versa.
  // Always returns true for stateless allocators.
  bool operator==(const AlignmentAllocator<T, N>&) const {
    return true;
  }
};

class Timer {
 public:
Guolin Ke's avatar
Guolin Ke committed
1054
1055
  Timer() {
#ifdef TIMETAG
1056
    int num_threads = OMP_NUM_THREADS();
Guolin Ke's avatar
Guolin Ke committed
1057
1058
1059
    start_time_.resize(num_threads);
    stats_.resize(num_threads);
#endif  // TIMETAG
1060
  }
1061

Guolin Ke's avatar
Guolin Ke committed
1062
1063
1064
  ~Timer() { Print(); }

#ifdef TIMETAG
1065
  void Start(const std::string& name) {
Guolin Ke's avatar
Guolin Ke committed
1066
1067
    auto tid = omp_get_thread_num();
    start_time_[tid][name] = std::chrono::steady_clock::now();
1068
  }
1069

1070
  void Stop(const std::string& name) {
Guolin Ke's avatar
Guolin Ke committed
1071
1072
1073
1074
    auto cur_time = std::chrono::steady_clock::now();
    auto tid = omp_get_thread_num();
    if (stats_[tid].find(name) == stats_[tid].end()) {
      stats_[tid][name] = std::chrono::duration<double, std::milli>(0);
1075
    }
Guolin Ke's avatar
Guolin Ke committed
1076
    stats_[tid][name] += cur_time - start_time_[tid][name];
1077
  }
1078

Guolin Ke's avatar
Guolin Ke committed
1079
1080
#else
  void Start(const std::string&) {}
1081

Guolin Ke's avatar
Guolin Ke committed
1082
1083
  void Stop(const std::string&) {}
#endif  // TIMETAG
1084
1085

  void Print() const {
Guolin Ke's avatar
Guolin Ke committed
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
#ifdef TIMETAG
    std::unordered_map<std::string, std::chrono::duration<double, std::milli>>
        stats(stats_[0].begin(), stats_[0].end());
    for (size_t i = 1; i < stats_.size(); ++i) {
      for (auto it = stats_[i].begin(); it != stats_[i].end(); ++it) {
        if (stats.find(it->first) == stats.end()) {
          stats[it->first] = it->second;
        } else {
          stats[it->first] += it->second;
        }
      }
    }
    std::map<std::string, std::chrono::duration<double, std::milli>> ordered(
        stats.begin(), stats.end());
1100
    for (auto it = ordered.begin(); it != ordered.end(); ++it) {
1101
      Log::Info("%s costs:\t %f", it->first.c_str(), it->second * 1e-3);
1102
    }
Guolin Ke's avatar
Guolin Ke committed
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
#endif  // TIMETAG
  }
#ifdef TIMETAG
  std::vector<
      std::unordered_map<std::string, std::chrono::steady_clock::time_point>>
      start_time_;
  std::vector<std::unordered_map<std::string,
                                 std::chrono::duration<double, std::milli>>>
      stats_;
#endif  // TIMETAG
1113
1114
1115
1116
1117
};

// Note: this class is not thread-safe, don't use it inside omp blocks
class FunctionTimer {
 public:
1118
#ifdef TIMETAG
Guolin Ke's avatar
Guolin Ke committed
1119
  FunctionTimer(const std::string& name, Timer& timer) : timer_(timer) {
1120
1121
1122
    timer.Start(name);
    name_ = name;
  }
1123

Guolin Ke's avatar
Guolin Ke committed
1124
  ~FunctionTimer() { timer_.Stop(name_); }
1125

1126
1127
1128
 private:
  std::string name_;
  Timer& timer_;
1129
1130
1131
#else
  FunctionTimer(const std::string&, Timer&) {}
#endif  // TIMETAG
1132
1133
};

Guolin Ke's avatar
Guolin Ke committed
1134
1135
}  // namespace Common

1136
1137
extern Common::Timer global_timer;

Guolin Ke's avatar
Guolin Ke committed
1138
1139
}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
1140
#endif   // LightGBM_UTILS_COMMON_FUN_H_