lightgbm_R.cpp 24.8 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2017 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
5
6

#include "lightgbm_R.h"
Guolin Ke's avatar
Guolin Ke committed
7

8
9
10
11
12
13
14
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/log.h>
#include <LightGBM/utils/openmp_wrapper.h>
#include <LightGBM/utils/text_reader.h>

#include <R_ext/Rdynload.h>

15
16
17
18
#define R_NO_REMAP
#define R_USE_C99_IN_CXX
#include <R_ext/Error.h>

19
20
21
22
23
24
25
#include <string>
#include <cstdio>
#include <cstring>
#include <memory>
#include <utility>
#include <vector>

Guolin Ke's avatar
Guolin Ke committed
26
27
28
29
30
#define COL_MAJOR (0)

#define R_API_BEGIN() \
  try {
#define R_API_END() } \
31
32
33
34
  catch(std::exception& ex) { LGBM_SetLastError(ex.what()); return R_NilValue;} \
  catch(std::string& ex) { LGBM_SetLastError(ex.c_str()); return R_NilValue; } \
  catch(...) { LGBM_SetLastError("unknown exception"); return R_NilValue;} \
  return R_NilValue;
Guolin Ke's avatar
Guolin Ke committed
35
36
37

#define CHECK_CALL(x) \
  if ((x) != 0) { \
38
    Rf_error(LGBM_GetLastError()); \
39
    return R_NilValue; \
Guolin Ke's avatar
Guolin Ke committed
40
41
  }

42
43
using LightGBM::Common::Split;
using LightGBM::Log;
Guolin Ke's avatar
Guolin Ke committed
44

45
46
47
48
SEXP LGBM_HandleIsNull_R(SEXP handle) {
  return Rf_ScalarLogical(R_ExternalPtrAddr(handle) == NULL);
}

49
50
SEXP LGBM_DatasetCreateFromFile_R(SEXP filename,
  SEXP parameters,
51
52
  SEXP reference) {
  SEXP ret;
Guolin Ke's avatar
Guolin Ke committed
53
  R_API_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
54
  DatasetHandle handle = nullptr;
55
56
57
58
  DatasetHandle ref = nullptr;
  if (!Rf_isNull(reference)) {
    ref = R_ExternalPtrAddr(reference);
  }
59
  CHECK_CALL(LGBM_DatasetCreateFromFile(CHAR(Rf_asChar(filename)), CHAR(Rf_asChar(parameters)),
60
61
62
63
    ref, &handle));
  ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
  UNPROTECT(1);
  return ret;
Guolin Ke's avatar
Guolin Ke committed
64
65
66
  R_API_END();
}

67
68
69
SEXP LGBM_DatasetCreateFromCSC_R(SEXP indptr,
  SEXP indices,
  SEXP data,
70
71
72
  SEXP num_indptr,
  SEXP nelem,
  SEXP num_row,
73
  SEXP parameters,
74
75
  SEXP reference) {
  SEXP ret;
Guolin Ke's avatar
Guolin Ke committed
76
  R_API_BEGIN();
77
78
79
  const int* p_indptr = INTEGER(indptr);
  const int* p_indices = INTEGER(indices);
  const double* p_data = REAL(data);
Guolin Ke's avatar
Guolin Ke committed
80

81
82
83
  int64_t nindptr = static_cast<int64_t>(Rf_asInteger(num_indptr));
  int64_t ndata = static_cast<int64_t>(Rf_asInteger(nelem));
  int64_t nrow = static_cast<int64_t>(Rf_asInteger(num_row));
Guolin Ke's avatar
Guolin Ke committed
84
  DatasetHandle handle = nullptr;
85
86
87
88
  DatasetHandle ref = nullptr;
  if (!Rf_isNull(reference)) {
    ref = R_ExternalPtrAddr(reference);
  }
Guolin Ke's avatar
Guolin Ke committed
89
90
  CHECK_CALL(LGBM_DatasetCreateFromCSC(p_indptr, C_API_DTYPE_INT32, p_indices,
    p_data, C_API_DTYPE_FLOAT64, nindptr, ndata,
91
92
93
94
    nrow, CHAR(Rf_asChar(parameters)), ref, &handle));
  ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
  UNPROTECT(1);
  return ret;
Guolin Ke's avatar
Guolin Ke committed
95
96
97
  R_API_END();
}

98
SEXP LGBM_DatasetCreateFromMat_R(SEXP data,
99
100
  SEXP num_row,
  SEXP num_col,
101
  SEXP parameters,
102
103
  SEXP reference) {
  SEXP ret;
Guolin Ke's avatar
Guolin Ke committed
104
  R_API_BEGIN();
105
106
  int32_t nrow = static_cast<int32_t>(Rf_asInteger(num_row));
  int32_t ncol = static_cast<int32_t>(Rf_asInteger(num_col));
107
  double* p_mat = REAL(data);
Guolin Ke's avatar
Guolin Ke committed
108
  DatasetHandle handle = nullptr;
109
110
111
112
  DatasetHandle ref = nullptr;
  if (!Rf_isNull(reference)) {
    ref = R_ExternalPtrAddr(reference);
  }
Guolin Ke's avatar
Guolin Ke committed
113
  CHECK_CALL(LGBM_DatasetCreateFromMat(p_mat, C_API_DTYPE_FLOAT64, nrow, ncol, COL_MAJOR,
114
115
116
117
    CHAR(Rf_asChar(parameters)), ref, &handle));
  ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
  UNPROTECT(1);
  return ret;
Guolin Ke's avatar
Guolin Ke committed
118
119
120
  R_API_END();
}

121
SEXP LGBM_DatasetGetSubset_R(SEXP handle,
122
  SEXP used_row_indices,
123
  SEXP len_used_row_indices,
124
125
  SEXP parameters) {
  SEXP ret;
Guolin Ke's avatar
Guolin Ke committed
126
  R_API_BEGIN();
127
128
  int32_t len = static_cast<int32_t>(Rf_asInteger(len_used_row_indices));
  std::vector<int32_t> idxvec(len);
129
  // convert from one-based to zero-based index
Guolin Ke's avatar
Guolin Ke committed
130
#pragma omp parallel for schedule(static, 512) if (len >= 1024)
131
132
  for (int32_t i = 0; i < len; ++i) {
    idxvec[i] = static_cast<int32_t>(INTEGER(used_row_indices)[i] - 1);
Guolin Ke's avatar
Guolin Ke committed
133
  }
Guolin Ke's avatar
Guolin Ke committed
134
  DatasetHandle res = nullptr;
135
  CHECK_CALL(LGBM_DatasetGetSubset(R_ExternalPtrAddr(handle),
136
    idxvec.data(), len, CHAR(Rf_asChar(parameters)),
Guolin Ke's avatar
Guolin Ke committed
137
    &res));
138
139
140
  ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
  UNPROTECT(1);
  return ret;
Guolin Ke's avatar
Guolin Ke committed
141
142
143
  R_API_END();
}

144
SEXP LGBM_DatasetSetFeatureNames_R(SEXP handle,
145
  SEXP feature_names) {
Guolin Ke's avatar
Guolin Ke committed
146
  R_API_BEGIN();
147
  auto vec_names = Split(CHAR(Rf_asChar(feature_names)), '\t');
Guolin Ke's avatar
Guolin Ke committed
148
149
150
151
152
  std::vector<const char*> vec_sptr;
  int len = static_cast<int>(vec_names.size());
  for (int i = 0; i < len; ++i) {
    vec_sptr.push_back(vec_names[i].c_str());
  }
153
  CHECK_CALL(LGBM_DatasetSetFeatureNames(R_ExternalPtrAddr(handle),
Guolin Ke's avatar
Guolin Ke committed
154
155
156
157
    vec_sptr.data(), len));
  R_API_END();
}

158
SEXP LGBM_DatasetGetFeatureNames_R(SEXP handle) {
159
  SEXP feature_names;
Guolin Ke's avatar
Guolin Ke committed
160
161
  R_API_BEGIN();
  int len = 0;
162
  CHECK_CALL(LGBM_DatasetGetNumFeature(R_ExternalPtrAddr(handle), &len));
163
  const size_t reserved_string_size = 256;
Guolin Ke's avatar
Guolin Ke committed
164
165
166
  std::vector<std::vector<char>> names(len);
  std::vector<char*> ptr_names(len);
  for (int i = 0; i < len; ++i) {
167
    names[i].resize(reserved_string_size);
Guolin Ke's avatar
Guolin Ke committed
168
169
170
    ptr_names[i] = names[i].data();
  }
  int out_len;
171
172
173
  size_t required_string_size;
  CHECK_CALL(
    LGBM_DatasetGetFeatureNames(
174
      R_ExternalPtrAddr(handle),
175
176
177
      len, &out_len,
      reserved_string_size, &required_string_size,
      ptr_names.data()));
178
179
180
181
182
183
184
185
186
  // if any feature names were larger than allocated size,
  // allow for a larger size and try again
  if (required_string_size > reserved_string_size) {
    for (int i = 0; i < len; ++i) {
      names[i].resize(required_string_size);
      ptr_names[i] = names[i].data();
    }
    CHECK_CALL(
      LGBM_DatasetGetFeatureNames(
187
        R_ExternalPtrAddr(handle),
188
189
190
191
192
193
        len,
        &out_len,
        required_string_size,
        &required_string_size,
        ptr_names.data()));
  }
Nikita Titov's avatar
Nikita Titov committed
194
  CHECK_EQ(len, out_len);
195
196
197
198
199
200
  feature_names = PROTECT(Rf_allocVector(STRSXP, len));
  for (int i = 0; i < len; ++i) {
    SET_STRING_ELT(feature_names, i, Rf_mkChar(ptr_names[i]));
  }
  UNPROTECT(1);
  return feature_names;
Guolin Ke's avatar
Guolin Ke committed
201
202
203
  R_API_END();
}

204
SEXP LGBM_DatasetSaveBinary_R(SEXP handle,
205
  SEXP filename) {
Guolin Ke's avatar
Guolin Ke committed
206
  R_API_BEGIN();
207
  CHECK_CALL(LGBM_DatasetSaveBinary(R_ExternalPtrAddr(handle),
208
    CHAR(Rf_asChar(filename))));
Guolin Ke's avatar
Guolin Ke committed
209
210
211
  R_API_END();
}

212
SEXP LGBM_DatasetFree_R(SEXP handle) {
Guolin Ke's avatar
Guolin Ke committed
213
  R_API_BEGIN();
214
215
216
  if (R_ExternalPtrAddr(handle)) {
    CHECK_CALL(LGBM_DatasetFree(R_ExternalPtrAddr(handle)));
    R_ClearExternalPtr(handle);
Guolin Ke's avatar
Guolin Ke committed
217
218
219
220
  }
  R_API_END();
}

221
SEXP LGBM_DatasetSetField_R(SEXP handle,
222
  SEXP field_name,
223
  SEXP field_data,
224
  SEXP num_element) {
Guolin Ke's avatar
Guolin Ke committed
225
  R_API_BEGIN();
226
  int len = Rf_asInteger(num_element);
227
  const char* name = CHAR(Rf_asChar(field_name));
Guolin Ke's avatar
Guolin Ke committed
228
229
  if (!strcmp("group", name) || !strcmp("query", name)) {
    std::vector<int32_t> vec(len);
Guolin Ke's avatar
Guolin Ke committed
230
#pragma omp parallel for schedule(static, 512) if (len >= 1024)
Guolin Ke's avatar
Guolin Ke committed
231
    for (int i = 0; i < len; ++i) {
232
      vec[i] = static_cast<int32_t>(INTEGER(field_data)[i]);
Guolin Ke's avatar
Guolin Ke committed
233
    }
234
    CHECK_CALL(LGBM_DatasetSetField(R_ExternalPtrAddr(handle), name, vec.data(), len, C_API_DTYPE_INT32));
235
  } else if (!strcmp("init_score", name)) {
236
    CHECK_CALL(LGBM_DatasetSetField(R_ExternalPtrAddr(handle), name, REAL(field_data), len, C_API_DTYPE_FLOAT64));
Guolin Ke's avatar
Guolin Ke committed
237
238
  } else {
    std::vector<float> vec(len);
Guolin Ke's avatar
Guolin Ke committed
239
#pragma omp parallel for schedule(static, 512) if (len >= 1024)
Guolin Ke's avatar
Guolin Ke committed
240
    for (int i = 0; i < len; ++i) {
241
      vec[i] = static_cast<float>(REAL(field_data)[i]);
Guolin Ke's avatar
Guolin Ke committed
242
    }
243
    CHECK_CALL(LGBM_DatasetSetField(R_ExternalPtrAddr(handle), name, vec.data(), len, C_API_DTYPE_FLOAT32));
Guolin Ke's avatar
Guolin Ke committed
244
245
246
247
  }
  R_API_END();
}

248
SEXP LGBM_DatasetGetField_R(SEXP handle,
249
  SEXP field_name,
250
  SEXP field_data) {
Guolin Ke's avatar
Guolin Ke committed
251
  R_API_BEGIN();
252
  const char* name = CHAR(Rf_asChar(field_name));
Guolin Ke's avatar
Guolin Ke committed
253
254
255
  int out_len = 0;
  int out_type = 0;
  const void* res;
256
  CHECK_CALL(LGBM_DatasetGetField(R_ExternalPtrAddr(handle), name, &out_len, &res, &out_type));
Guolin Ke's avatar
Guolin Ke committed
257
258
259
260

  if (!strcmp("group", name) || !strcmp("query", name)) {
    auto p_data = reinterpret_cast<const int32_t*>(res);
    // convert from boundaries to size
Guolin Ke's avatar
Guolin Ke committed
261
#pragma omp parallel for schedule(static, 512) if (out_len >= 1024)
Guolin Ke's avatar
Guolin Ke committed
262
    for (int i = 0; i < out_len - 1; ++i) {
263
      INTEGER(field_data)[i] = p_data[i + 1] - p_data[i];
Guolin Ke's avatar
Guolin Ke committed
264
    }
Guolin Ke's avatar
Guolin Ke committed
265
266
  } else if (!strcmp("init_score", name)) {
    auto p_data = reinterpret_cast<const double*>(res);
Guolin Ke's avatar
Guolin Ke committed
267
#pragma omp parallel for schedule(static, 512) if (out_len >= 1024)
Guolin Ke's avatar
Guolin Ke committed
268
    for (int i = 0; i < out_len; ++i) {
269
      REAL(field_data)[i] = p_data[i];
Guolin Ke's avatar
Guolin Ke committed
270
    }
Guolin Ke's avatar
Guolin Ke committed
271
272
  } else {
    auto p_data = reinterpret_cast<const float*>(res);
Guolin Ke's avatar
Guolin Ke committed
273
#pragma omp parallel for schedule(static, 512) if (out_len >= 1024)
Guolin Ke's avatar
Guolin Ke committed
274
    for (int i = 0; i < out_len; ++i) {
275
      REAL(field_data)[i] = p_data[i];
Guolin Ke's avatar
Guolin Ke committed
276
277
278
279
280
    }
  }
  R_API_END();
}

281
SEXP LGBM_DatasetGetFieldSize_R(SEXP handle,
282
  SEXP field_name,
283
  SEXP out) {
Guolin Ke's avatar
Guolin Ke committed
284
  R_API_BEGIN();
285
  const char* name = CHAR(Rf_asChar(field_name));
Guolin Ke's avatar
Guolin Ke committed
286
287
288
  int out_len = 0;
  int out_type = 0;
  const void* res;
289
  CHECK_CALL(LGBM_DatasetGetField(R_ExternalPtrAddr(handle), name, &out_len, &res, &out_type));
Guolin Ke's avatar
Guolin Ke committed
290
291
292
  if (!strcmp("group", name) || !strcmp("query", name)) {
    out_len -= 1;
  }
293
  INTEGER(out)[0] = out_len;
Guolin Ke's avatar
Guolin Ke committed
294
295
296
  R_API_END();
}

297
298
SEXP LGBM_DatasetUpdateParamChecking_R(SEXP old_params,
  SEXP new_params) {
299
  R_API_BEGIN();
300
  CHECK_CALL(LGBM_DatasetUpdateParamChecking(CHAR(Rf_asChar(old_params)), CHAR(Rf_asChar(new_params))));
301
302
303
  R_API_END();
}

304
SEXP LGBM_DatasetGetNumData_R(SEXP handle, SEXP out) {
Guolin Ke's avatar
Guolin Ke committed
305
306
  int nrow;
  R_API_BEGIN();
307
  CHECK_CALL(LGBM_DatasetGetNumData(R_ExternalPtrAddr(handle), &nrow));
308
  INTEGER(out)[0] = nrow;
Guolin Ke's avatar
Guolin Ke committed
309
310
311
  R_API_END();
}

312
SEXP LGBM_DatasetGetNumFeature_R(SEXP handle,
313
  SEXP out) {
Guolin Ke's avatar
Guolin Ke committed
314
315
  int nfeature;
  R_API_BEGIN();
316
  CHECK_CALL(LGBM_DatasetGetNumFeature(R_ExternalPtrAddr(handle), &nfeature));
317
  INTEGER(out)[0] = nfeature;
Guolin Ke's avatar
Guolin Ke committed
318
319
320
321
322
  R_API_END();
}

// --- start Booster interfaces

323
SEXP LGBM_BoosterFree_R(SEXP handle) {
Guolin Ke's avatar
Guolin Ke committed
324
  R_API_BEGIN();
325
326
327
  if (R_ExternalPtrAddr(handle)) {
    CHECK_CALL(LGBM_BoosterFree(R_ExternalPtrAddr(handle)));
    R_ClearExternalPtr(handle);
Guolin Ke's avatar
Guolin Ke committed
328
329
330
331
  }
  R_API_END();
}

332
333
334
SEXP LGBM_BoosterCreate_R(SEXP train_data,
  SEXP parameters) {
  SEXP ret;
Guolin Ke's avatar
Guolin Ke committed
335
  R_API_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
336
  BoosterHandle handle = nullptr;
337
338
339
340
  CHECK_CALL(LGBM_BoosterCreate(R_ExternalPtrAddr(train_data), CHAR(Rf_asChar(parameters)), &handle));
  ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
  UNPROTECT(1);
  return ret;
Guolin Ke's avatar
Guolin Ke committed
341
342
343
  R_API_END();
}

344
345
SEXP LGBM_BoosterCreateFromModelfile_R(SEXP filename) {
  SEXP ret;
Guolin Ke's avatar
Guolin Ke committed
346
347
  R_API_BEGIN();
  int out_num_iterations = 0;
Guolin Ke's avatar
Guolin Ke committed
348
  BoosterHandle handle = nullptr;
349
  CHECK_CALL(LGBM_BoosterCreateFromModelfile(CHAR(Rf_asChar(filename)), &out_num_iterations, &handle));
350
351
352
  ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
  UNPROTECT(1);
  return ret;
Guolin Ke's avatar
Guolin Ke committed
353
354
355
  R_API_END();
}

356
357
SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) {
  SEXP ret;
358
359
  R_API_BEGIN();
  int out_num_iterations = 0;
Guolin Ke's avatar
Guolin Ke committed
360
  BoosterHandle handle = nullptr;
361
  CHECK_CALL(LGBM_BoosterLoadModelFromString(CHAR(Rf_asChar(model_str)), &out_num_iterations, &handle));
362
363
364
  ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
  UNPROTECT(1);
  return ret;
365
366
367
  R_API_END();
}

368
369
SEXP LGBM_BoosterMerge_R(SEXP handle,
  SEXP other_handle) {
Guolin Ke's avatar
Guolin Ke committed
370
  R_API_BEGIN();
371
  CHECK_CALL(LGBM_BoosterMerge(R_ExternalPtrAddr(handle), R_ExternalPtrAddr(other_handle)));
Guolin Ke's avatar
Guolin Ke committed
372
373
374
  R_API_END();
}

375
376
SEXP LGBM_BoosterAddValidData_R(SEXP handle,
  SEXP valid_data) {
Guolin Ke's avatar
Guolin Ke committed
377
  R_API_BEGIN();
378
  CHECK_CALL(LGBM_BoosterAddValidData(R_ExternalPtrAddr(handle), R_ExternalPtrAddr(valid_data)));
Guolin Ke's avatar
Guolin Ke committed
379
380
381
  R_API_END();
}

382
383
SEXP LGBM_BoosterResetTrainingData_R(SEXP handle,
  SEXP train_data) {
Guolin Ke's avatar
Guolin Ke committed
384
  R_API_BEGIN();
385
  CHECK_CALL(LGBM_BoosterResetTrainingData(R_ExternalPtrAddr(handle), R_ExternalPtrAddr(train_data)));
Guolin Ke's avatar
Guolin Ke committed
386
387
388
  R_API_END();
}

389
SEXP LGBM_BoosterResetParameter_R(SEXP handle,
390
  SEXP parameters) {
Guolin Ke's avatar
Guolin Ke committed
391
  R_API_BEGIN();
392
  CHECK_CALL(LGBM_BoosterResetParameter(R_ExternalPtrAddr(handle), CHAR(Rf_asChar(parameters))));
Guolin Ke's avatar
Guolin Ke committed
393
394
395
  R_API_END();
}

396
SEXP LGBM_BoosterGetNumClasses_R(SEXP handle,
397
  SEXP out) {
Guolin Ke's avatar
Guolin Ke committed
398
399
  int num_class;
  R_API_BEGIN();
400
  CHECK_CALL(LGBM_BoosterGetNumClasses(R_ExternalPtrAddr(handle), &num_class));
401
  INTEGER(out)[0] = num_class;
Guolin Ke's avatar
Guolin Ke committed
402
403
404
  R_API_END();
}

405
SEXP LGBM_BoosterUpdateOneIter_R(SEXP handle) {
Guolin Ke's avatar
Guolin Ke committed
406
407
  int is_finished = 0;
  R_API_BEGIN();
408
  CHECK_CALL(LGBM_BoosterUpdateOneIter(R_ExternalPtrAddr(handle), &is_finished));
Guolin Ke's avatar
Guolin Ke committed
409
410
411
  R_API_END();
}

412
SEXP LGBM_BoosterUpdateOneIterCustom_R(SEXP handle,
413
414
  SEXP grad,
  SEXP hess,
415
  SEXP len) {
Guolin Ke's avatar
Guolin Ke committed
416
417
  int is_finished = 0;
  R_API_BEGIN();
418
  int int_len = Rf_asInteger(len);
Guolin Ke's avatar
Guolin Ke committed
419
  std::vector<float> tgrad(int_len), thess(int_len);
Guolin Ke's avatar
Guolin Ke committed
420
#pragma omp parallel for schedule(static, 512) if (int_len >= 1024)
Guolin Ke's avatar
Guolin Ke committed
421
  for (int j = 0; j < int_len; ++j) {
422
423
    tgrad[j] = static_cast<float>(REAL(grad)[j]);
    thess[j] = static_cast<float>(REAL(hess)[j]);
Guolin Ke's avatar
Guolin Ke committed
424
  }
425
  CHECK_CALL(LGBM_BoosterUpdateOneIterCustom(R_ExternalPtrAddr(handle), tgrad.data(), thess.data(), &is_finished));
Guolin Ke's avatar
Guolin Ke committed
426
427
428
  R_API_END();
}

429
SEXP LGBM_BoosterRollbackOneIter_R(SEXP handle) {
Guolin Ke's avatar
Guolin Ke committed
430
  R_API_BEGIN();
431
  CHECK_CALL(LGBM_BoosterRollbackOneIter(R_ExternalPtrAddr(handle)));
Guolin Ke's avatar
Guolin Ke committed
432
433
434
  R_API_END();
}

435
SEXP LGBM_BoosterGetCurrentIteration_R(SEXP handle,
436
  SEXP out) {
Guolin Ke's avatar
Guolin Ke committed
437
438
  int out_iteration;
  R_API_BEGIN();
439
  CHECK_CALL(LGBM_BoosterGetCurrentIteration(R_ExternalPtrAddr(handle), &out_iteration));
440
  INTEGER(out)[0] = out_iteration;
Guolin Ke's avatar
Guolin Ke committed
441
442
443
  R_API_END();
}

444
SEXP LGBM_BoosterGetUpperBoundValue_R(SEXP handle,
445
  SEXP out_result) {
446
  R_API_BEGIN();
447
  double* ptr_ret = REAL(out_result);
448
  CHECK_CALL(LGBM_BoosterGetUpperBoundValue(R_ExternalPtrAddr(handle), ptr_ret));
449
450
451
  R_API_END();
}

452
SEXP LGBM_BoosterGetLowerBoundValue_R(SEXP handle,
453
  SEXP out_result) {
454
  R_API_BEGIN();
455
  double* ptr_ret = REAL(out_result);
456
  CHECK_CALL(LGBM_BoosterGetLowerBoundValue(R_ExternalPtrAddr(handle), ptr_ret));
457
458
459
  R_API_END();
}

460
SEXP LGBM_BoosterGetEvalNames_R(SEXP handle) {
461
  SEXP eval_names;
Guolin Ke's avatar
Guolin Ke committed
462
463
  R_API_BEGIN();
  int len;
464
  CHECK_CALL(LGBM_BoosterGetEvalCounts(R_ExternalPtrAddr(handle), &len));
465
466

  const size_t reserved_string_size = 128;
Guolin Ke's avatar
Guolin Ke committed
467
468
469
  std::vector<std::vector<char>> names(len);
  std::vector<char*> ptr_names(len);
  for (int i = 0; i < len; ++i) {
470
    names[i].resize(reserved_string_size);
Guolin Ke's avatar
Guolin Ke committed
471
472
    ptr_names[i] = names[i].data();
  }
473

Guolin Ke's avatar
Guolin Ke committed
474
  int out_len;
475
476
477
  size_t required_string_size;
  CHECK_CALL(
    LGBM_BoosterGetEvalNames(
478
      R_ExternalPtrAddr(handle),
479
480
481
      len, &out_len,
      reserved_string_size, &required_string_size,
      ptr_names.data()));
482
483
484
485
486
487
488
489
490
  // if any eval names were larger than allocated size,
  // allow for a larger size and try again
  if (required_string_size > reserved_string_size) {
    for (int i = 0; i < len; ++i) {
      names[i].resize(required_string_size);
      ptr_names[i] = names[i].data();
    }
    CHECK_CALL(
      LGBM_BoosterGetEvalNames(
491
        R_ExternalPtrAddr(handle),
492
493
494
495
496
497
        len,
        &out_len,
        required_string_size,
        &required_string_size,
        ptr_names.data()));
  }
Nikita Titov's avatar
Nikita Titov committed
498
  CHECK_EQ(out_len, len);
499
500
501
502
503
504
  eval_names = PROTECT(Rf_allocVector(STRSXP, len));
  for (int i = 0; i < len; ++i) {
    SET_STRING_ELT(eval_names, i, Rf_mkChar(ptr_names[i]));
  }
  UNPROTECT(1);
  return eval_names;
Guolin Ke's avatar
Guolin Ke committed
505
506
507
  R_API_END();
}

508
SEXP LGBM_BoosterGetEval_R(SEXP handle,
509
  SEXP data_idx,
510
  SEXP out_result) {
Guolin Ke's avatar
Guolin Ke committed
511
512
  R_API_BEGIN();
  int len;
513
  CHECK_CALL(LGBM_BoosterGetEvalCounts(R_ExternalPtrAddr(handle), &len));
514
  double* ptr_ret = REAL(out_result);
Guolin Ke's avatar
Guolin Ke committed
515
  int out_len;
516
  CHECK_CALL(LGBM_BoosterGetEval(R_ExternalPtrAddr(handle), Rf_asInteger(data_idx), &out_len, ptr_ret));
Nikita Titov's avatar
Nikita Titov committed
517
  CHECK_EQ(out_len, len);
Guolin Ke's avatar
Guolin Ke committed
518
519
520
  R_API_END();
}

521
SEXP LGBM_BoosterGetNumPredict_R(SEXP handle,
522
  SEXP data_idx,
523
  SEXP out) {
Guolin Ke's avatar
Guolin Ke committed
524
525
  R_API_BEGIN();
  int64_t len;
526
  CHECK_CALL(LGBM_BoosterGetNumPredict(R_ExternalPtrAddr(handle), Rf_asInteger(data_idx), &len));
527
  INTEGER(out)[0] = static_cast<int>(len);
Guolin Ke's avatar
Guolin Ke committed
528
529
530
  R_API_END();
}

531
SEXP LGBM_BoosterGetPredict_R(SEXP handle,
532
  SEXP data_idx,
533
  SEXP out_result) {
Guolin Ke's avatar
Guolin Ke committed
534
  R_API_BEGIN();
535
  double* ptr_ret = REAL(out_result);
Guolin Ke's avatar
Guolin Ke committed
536
  int64_t out_len;
537
  CHECK_CALL(LGBM_BoosterGetPredict(R_ExternalPtrAddr(handle), Rf_asInteger(data_idx), &out_len, ptr_ret));
Guolin Ke's avatar
Guolin Ke committed
538
539
540
  R_API_END();
}

541
int GetPredictType(SEXP is_rawscore, SEXP is_leafidx, SEXP is_predcontrib) {
Guolin Ke's avatar
Guolin Ke committed
542
  int pred_type = C_API_PREDICT_NORMAL;
543
  if (Rf_asInteger(is_rawscore)) {
Guolin Ke's avatar
Guolin Ke committed
544
545
    pred_type = C_API_PREDICT_RAW_SCORE;
  }
546
  if (Rf_asInteger(is_leafidx)) {
Guolin Ke's avatar
Guolin Ke committed
547
548
    pred_type = C_API_PREDICT_LEAF_INDEX;
  }
549
  if (Rf_asInteger(is_predcontrib)) {
550
551
    pred_type = C_API_PREDICT_CONTRIB;
  }
Guolin Ke's avatar
Guolin Ke committed
552
553
554
  return pred_type;
}

555
SEXP LGBM_BoosterPredictForFile_R(SEXP handle,
556
  SEXP data_filename,
557
558
559
560
561
562
  SEXP data_has_header,
  SEXP is_rawscore,
  SEXP is_leafidx,
  SEXP is_predcontrib,
  SEXP start_iteration,
  SEXP num_iteration,
563
564
  SEXP parameter,
  SEXP result_filename) {
Guolin Ke's avatar
Guolin Ke committed
565
  R_API_BEGIN();
566
  int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
567
  CHECK_CALL(LGBM_BoosterPredictForFile(R_ExternalPtrAddr(handle), CHAR(Rf_asChar(data_filename)),
568
569
    Rf_asInteger(data_has_header), pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), CHAR(Rf_asChar(parameter)),
    CHAR(Rf_asChar(result_filename))));
Guolin Ke's avatar
Guolin Ke committed
570
571
572
  R_API_END();
}

573
SEXP LGBM_BoosterCalcNumPredict_R(SEXP handle,
574
575
576
577
578
579
  SEXP num_row,
  SEXP is_rawscore,
  SEXP is_leafidx,
  SEXP is_predcontrib,
  SEXP start_iteration,
  SEXP num_iteration,
580
  SEXP out_len) {
Guolin Ke's avatar
Guolin Ke committed
581
  R_API_BEGIN();
582
  int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
Guolin Ke's avatar
Guolin Ke committed
583
  int64_t len = 0;
584
  CHECK_CALL(LGBM_BoosterCalcNumPredict(R_ExternalPtrAddr(handle), Rf_asInteger(num_row),
585
    pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), &len));
586
  INTEGER(out_len)[0] = static_cast<int>(len);
Guolin Ke's avatar
Guolin Ke committed
587
588
589
  R_API_END();
}

590
SEXP LGBM_BoosterPredictForCSC_R(SEXP handle,
591
592
593
  SEXP indptr,
  SEXP indices,
  SEXP data,
594
595
596
597
598
599
600
601
  SEXP num_indptr,
  SEXP nelem,
  SEXP num_row,
  SEXP is_rawscore,
  SEXP is_leafidx,
  SEXP is_predcontrib,
  SEXP start_iteration,
  SEXP num_iteration,
602
  SEXP parameter,
603
  SEXP out_result) {
Guolin Ke's avatar
Guolin Ke committed
604
  R_API_BEGIN();
605
  int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
Guolin Ke's avatar
Guolin Ke committed
606

607
  const int* p_indptr = INTEGER(indptr);
608
  const int32_t* p_indices = reinterpret_cast<const int32_t*>(INTEGER(indices));
609
  const double* p_data = REAL(data);
Guolin Ke's avatar
Guolin Ke committed
610

611
612
613
  int64_t nindptr = static_cast<int64_t>(Rf_asInteger(num_indptr));
  int64_t ndata = static_cast<int64_t>(Rf_asInteger(nelem));
  int64_t nrow = static_cast<int64_t>(Rf_asInteger(num_row));
614
  double* ptr_ret = REAL(out_result);
Guolin Ke's avatar
Guolin Ke committed
615
  int64_t out_len;
616
  CHECK_CALL(LGBM_BoosterPredictForCSC(R_ExternalPtrAddr(handle),
Guolin Ke's avatar
Guolin Ke committed
617
618
    p_indptr, C_API_DTYPE_INT32, p_indices,
    p_data, C_API_DTYPE_FLOAT64, nindptr, ndata,
619
    nrow, pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), CHAR(Rf_asChar(parameter)), &out_len, ptr_ret));
Guolin Ke's avatar
Guolin Ke committed
620
621
622
  R_API_END();
}

623
SEXP LGBM_BoosterPredictForMat_R(SEXP handle,
624
  SEXP data,
625
626
627
628
629
630
631
  SEXP num_row,
  SEXP num_col,
  SEXP is_rawscore,
  SEXP is_leafidx,
  SEXP is_predcontrib,
  SEXP start_iteration,
  SEXP num_iteration,
632
  SEXP parameter,
633
  SEXP out_result) {
Guolin Ke's avatar
Guolin Ke committed
634
  R_API_BEGIN();
635
  int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
Guolin Ke's avatar
Guolin Ke committed
636

637
638
  int32_t nrow = static_cast<int32_t>(Rf_asInteger(num_row));
  int32_t ncol = static_cast<int32_t>(Rf_asInteger(num_col));
Guolin Ke's avatar
Guolin Ke committed
639

640
641
  const double* p_mat = REAL(data);
  double* ptr_ret = REAL(out_result);
Guolin Ke's avatar
Guolin Ke committed
642
  int64_t out_len;
643
  CHECK_CALL(LGBM_BoosterPredictForMat(R_ExternalPtrAddr(handle),
Guolin Ke's avatar
Guolin Ke committed
644
    p_mat, C_API_DTYPE_FLOAT64, nrow, ncol, COL_MAJOR,
645
    pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), CHAR(Rf_asChar(parameter)), &out_len, ptr_ret));
Guolin Ke's avatar
Guolin Ke committed
646
647
648
649

  R_API_END();
}

650
SEXP LGBM_BoosterSaveModel_R(SEXP handle,
651
652
  SEXP num_iteration,
  SEXP feature_importance_type,
653
  SEXP filename) {
Guolin Ke's avatar
Guolin Ke committed
654
  R_API_BEGIN();
655
  CHECK_CALL(LGBM_BoosterSaveModel(R_ExternalPtrAddr(handle), 0, Rf_asInteger(num_iteration), Rf_asInteger(feature_importance_type), CHAR(Rf_asChar(filename))));
Guolin Ke's avatar
Guolin Ke committed
656
657
658
  R_API_END();
}

659
SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
660
  SEXP num_iteration,
661
662
  SEXP feature_importance_type) {
  SEXP model_str;
663
  R_API_BEGIN();
664
  int64_t out_len = 0;
665
  int64_t buf_len = 1024 * 1024;
666
667
  int num_iter = Rf_asInteger(num_iteration);
  int importance_type = Rf_asInteger(feature_importance_type);
668
  std::vector<char> inner_char_buf(buf_len);
669
  CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), 0, num_iter, importance_type, buf_len, &out_len, inner_char_buf.data()));
670
671
672
  // if the model string was larger than the initial buffer, allocate a bigger buffer and try again
  if (out_len > buf_len) {
    inner_char_buf.resize(out_len);
673
    CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), 0, num_iter, importance_type, out_len, &out_len, inner_char_buf.data()));
674
675
676
677
678
  }
  model_str = PROTECT(Rf_allocVector(STRSXP, 1));
  SET_STRING_ELT(model_str, 0, Rf_mkChar(inner_char_buf.data()));
  UNPROTECT(1);
  return model_str;
679
680
681
  R_API_END();
}

682
SEXP LGBM_BoosterDumpModel_R(SEXP handle,
683
  SEXP num_iteration,
684
685
  SEXP feature_importance_type) {
  SEXP model_str;
Guolin Ke's avatar
Guolin Ke committed
686
  R_API_BEGIN();
687
  int64_t out_len = 0;
688
  int64_t buf_len = 1024 * 1024;
689
690
  int num_iter = Rf_asInteger(num_iteration);
  int importance_type = Rf_asInteger(feature_importance_type);
691
  std::vector<char> inner_char_buf(buf_len);
692
  CHECK_CALL(LGBM_BoosterDumpModel(R_ExternalPtrAddr(handle), 0, num_iter, importance_type, buf_len, &out_len, inner_char_buf.data()));
693
694
695
  // if the model string was larger than the initial buffer, allocate a bigger buffer and try again
  if (out_len > buf_len) {
    inner_char_buf.resize(out_len);
696
    CHECK_CALL(LGBM_BoosterDumpModel(R_ExternalPtrAddr(handle), 0, num_iter, importance_type, out_len, &out_len, inner_char_buf.data()));
697
698
699
700
701
  }
  model_str = PROTECT(Rf_allocVector(STRSXP, 1));
  SET_STRING_ELT(model_str, 0, Rf_mkChar(inner_char_buf.data()));
  UNPROTECT(1);
  return model_str;
Guolin Ke's avatar
Guolin Ke committed
702
703
  R_API_END();
}
704
705
706

// .Call() calls
static const R_CallMethodDef CallEntries[] = {
707
708
709
710
711
  {"LGBM_HandleIsNull_R"              , (DL_FUNC) &LGBM_HandleIsNull_R              , 1},
  {"LGBM_DatasetCreateFromFile_R"     , (DL_FUNC) &LGBM_DatasetCreateFromFile_R     , 3},
  {"LGBM_DatasetCreateFromCSC_R"      , (DL_FUNC) &LGBM_DatasetCreateFromCSC_R      , 8},
  {"LGBM_DatasetCreateFromMat_R"      , (DL_FUNC) &LGBM_DatasetCreateFromMat_R      , 5},
  {"LGBM_DatasetGetSubset_R"          , (DL_FUNC) &LGBM_DatasetGetSubset_R          , 4},
712
  {"LGBM_DatasetSetFeatureNames_R"    , (DL_FUNC) &LGBM_DatasetSetFeatureNames_R    , 2},
713
  {"LGBM_DatasetGetFeatureNames_R"    , (DL_FUNC) &LGBM_DatasetGetFeatureNames_R    , 1},
714
715
716
717
718
719
720
721
  {"LGBM_DatasetSaveBinary_R"         , (DL_FUNC) &LGBM_DatasetSaveBinary_R         , 2},
  {"LGBM_DatasetFree_R"               , (DL_FUNC) &LGBM_DatasetFree_R               , 1},
  {"LGBM_DatasetSetField_R"           , (DL_FUNC) &LGBM_DatasetSetField_R           , 4},
  {"LGBM_DatasetGetFieldSize_R"       , (DL_FUNC) &LGBM_DatasetGetFieldSize_R       , 3},
  {"LGBM_DatasetGetField_R"           , (DL_FUNC) &LGBM_DatasetGetField_R           , 3},
  {"LGBM_DatasetUpdateParamChecking_R", (DL_FUNC) &LGBM_DatasetUpdateParamChecking_R, 2},
  {"LGBM_DatasetGetNumData_R"         , (DL_FUNC) &LGBM_DatasetGetNumData_R         , 2},
  {"LGBM_DatasetGetNumFeature_R"      , (DL_FUNC) &LGBM_DatasetGetNumFeature_R      , 2},
722
  {"LGBM_BoosterCreate_R"             , (DL_FUNC) &LGBM_BoosterCreate_R             , 2},
723
  {"LGBM_BoosterFree_R"               , (DL_FUNC) &LGBM_BoosterFree_R               , 1},
724
725
  {"LGBM_BoosterCreateFromModelfile_R", (DL_FUNC) &LGBM_BoosterCreateFromModelfile_R, 1},
  {"LGBM_BoosterLoadModelFromString_R", (DL_FUNC) &LGBM_BoosterLoadModelFromString_R, 1},
726
727
728
729
730
731
732
733
734
735
736
  {"LGBM_BoosterMerge_R"              , (DL_FUNC) &LGBM_BoosterMerge_R              , 2},
  {"LGBM_BoosterAddValidData_R"       , (DL_FUNC) &LGBM_BoosterAddValidData_R       , 2},
  {"LGBM_BoosterResetTrainingData_R"  , (DL_FUNC) &LGBM_BoosterResetTrainingData_R  , 2},
  {"LGBM_BoosterResetParameter_R"     , (DL_FUNC) &LGBM_BoosterResetParameter_R     , 2},
  {"LGBM_BoosterGetNumClasses_R"      , (DL_FUNC) &LGBM_BoosterGetNumClasses_R      , 2},
  {"LGBM_BoosterUpdateOneIter_R"      , (DL_FUNC) &LGBM_BoosterUpdateOneIter_R      , 1},
  {"LGBM_BoosterUpdateOneIterCustom_R", (DL_FUNC) &LGBM_BoosterUpdateOneIterCustom_R, 4},
  {"LGBM_BoosterRollbackOneIter_R"    , (DL_FUNC) &LGBM_BoosterRollbackOneIter_R    , 1},
  {"LGBM_BoosterGetCurrentIteration_R", (DL_FUNC) &LGBM_BoosterGetCurrentIteration_R, 2},
  {"LGBM_BoosterGetUpperBoundValue_R" , (DL_FUNC) &LGBM_BoosterGetUpperBoundValue_R , 2},
  {"LGBM_BoosterGetLowerBoundValue_R" , (DL_FUNC) &LGBM_BoosterGetLowerBoundValue_R , 2},
737
  {"LGBM_BoosterGetEvalNames_R"       , (DL_FUNC) &LGBM_BoosterGetEvalNames_R       , 1},
738
739
740
741
742
743
744
745
  {"LGBM_BoosterGetEval_R"            , (DL_FUNC) &LGBM_BoosterGetEval_R            , 3},
  {"LGBM_BoosterGetNumPredict_R"      , (DL_FUNC) &LGBM_BoosterGetNumPredict_R      , 3},
  {"LGBM_BoosterGetPredict_R"         , (DL_FUNC) &LGBM_BoosterGetPredict_R         , 3},
  {"LGBM_BoosterPredictForFile_R"     , (DL_FUNC) &LGBM_BoosterPredictForFile_R     , 10},
  {"LGBM_BoosterCalcNumPredict_R"     , (DL_FUNC) &LGBM_BoosterCalcNumPredict_R     , 8},
  {"LGBM_BoosterPredictForCSC_R"      , (DL_FUNC) &LGBM_BoosterPredictForCSC_R      , 14},
  {"LGBM_BoosterPredictForMat_R"      , (DL_FUNC) &LGBM_BoosterPredictForMat_R      , 11},
  {"LGBM_BoosterSaveModel_R"          , (DL_FUNC) &LGBM_BoosterSaveModel_R          , 4},
746
747
  {"LGBM_BoosterSaveModelToString_R"  , (DL_FUNC) &LGBM_BoosterSaveModelToString_R  , 3},
  {"LGBM_BoosterDumpModel_R"          , (DL_FUNC) &LGBM_BoosterDumpModel_R          , 3},
748
749
750
  {NULL, NULL, 0}
};

751
752
LIGHTGBM_C_EXPORT void R_init_lightgbm(DllInfo *dll);

753
754
755
756
void R_init_lightgbm(DllInfo *dll) {
  R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
  R_useDynamicSymbols(dll, FALSE);
}