lgb.Predictor.R 15.8 KB
Newer Older
1
#' @importFrom methods is new
James Lamb's avatar
James Lamb committed
2
#' @importFrom R6 R6Class
3
#' @importFrom utils read.delim
4
#' @importClassesFrom Matrix dsparseMatrix dsparseVector dgCMatrix dgRMatrix CsparseMatrix RsparseMatrix
James Lamb's avatar
James Lamb committed
5
6
Predictor <- R6::R6Class(

7
  classname = "lgb.Predictor",
8
  cloneable = FALSE,
Guolin Ke's avatar
Guolin Ke committed
9
  public = list(
James Lamb's avatar
James Lamb committed
10

11
    # Finalize will free up the handles
Guolin Ke's avatar
Guolin Ke committed
12
    finalize = function() {
James Lamb's avatar
James Lamb committed
13

14
      # Check the need for freeing handle
15
      if (private$need_free_handle) {
James Lamb's avatar
James Lamb committed
16

17
18
        .Call(
          LGBM_BoosterFree_R
19
20
          , private$handle
        )
Guolin Ke's avatar
Guolin Ke committed
21
        private$handle <- NULL
James Lamb's avatar
James Lamb committed
22

Guolin Ke's avatar
Guolin Ke committed
23
      }
James Lamb's avatar
James Lamb committed
24

25
26
      return(invisible(NULL))

27
    },
James Lamb's avatar
James Lamb committed
28

29
    # Initialize will create a starter model
30
    initialize = function(modelfile, params = list(), fast_predict_config = list()) {
31
      private$params <- lgb.params2str(params = params)
32
      handle <- NULL
James Lamb's avatar
James Lamb committed
33

34
      if (is.character(modelfile)) {
James Lamb's avatar
James Lamb committed
35

36
        # Create handle on it
37
        handle <- .Call(
38
          LGBM_BoosterCreateFromModelfile_R
39
          , path.expand(modelfile)
40
        )
41
        private$need_free_handle <- TRUE
James Lamb's avatar
James Lamb committed
42

43
      } else if (methods::is(modelfile, "lgb.Booster.handle") || inherits(modelfile, "externalptr")) {
James Lamb's avatar
James Lamb committed
44

45
        # Check if model file is a booster handle already
Guolin Ke's avatar
Guolin Ke committed
46
        handle <- modelfile
47
        private$need_free_handle <- FALSE
James Lamb's avatar
James Lamb committed
48

49
50
51
52
53
      } else if (lgb.is.Booster(modelfile)) {

        handle <- modelfile$get_handle()
        private$need_free_handle <- FALSE

Guolin Ke's avatar
Guolin Ke committed
54
      } else {
James Lamb's avatar
James Lamb committed
55

56
        stop("lgb.Predictor: modelfile must be either a character filename or an lgb.Booster.handle")
James Lamb's avatar
James Lamb committed
57

Guolin Ke's avatar
Guolin Ke committed
58
      }
James Lamb's avatar
James Lamb committed
59

60
61
      private$fast_predict_config <- fast_predict_config

62
      # Override class and store it
Guolin Ke's avatar
Guolin Ke committed
63
64
      class(handle) <- "lgb.Booster.handle"
      private$handle <- handle
James Lamb's avatar
James Lamb committed
65

66
67
      return(invisible(NULL))

Guolin Ke's avatar
Guolin Ke committed
68
    },
James Lamb's avatar
James Lamb committed
69

70
    # Get current iteration
Guolin Ke's avatar
Guolin Ke committed
71
    current_iter = function() {
James Lamb's avatar
James Lamb committed
72

73
      cur_iter <- 0L
74
75
76
77
      .Call(
        LGBM_BoosterGetCurrentIteration_R
        , private$handle
        , cur_iter
78
      )
79
      return(cur_iter)
James Lamb's avatar
James Lamb committed
80

Guolin Ke's avatar
Guolin Ke committed
81
    },
James Lamb's avatar
James Lamb committed
82

83
84
    # Predict from data
    predict = function(data,
85
                       start_iteration = NULL,
86
87
88
                       num_iteration = NULL,
                       rawscore = FALSE,
                       predleaf = FALSE,
89
                       predcontrib = FALSE,
90
                       header = FALSE) {
James Lamb's avatar
James Lamb committed
91

92
93
      # Check if number of iterations is existing - if not, then set it to -1 (use all)
      if (is.null(num_iteration)) {
94
        num_iteration <- -1L
95
      }
96
97
98
99
      # Check if start iterations is existing - if not, then set it to 0 (start from the first iteration)
      if (is.null(start_iteration)) {
        start_iteration <- 0L
      }
James Lamb's avatar
James Lamb committed
100

101
      num_row <- 0L
James Lamb's avatar
James Lamb committed
102

Laurae's avatar
Laurae committed
103
      # Check if data is a file name and not a matrix
104
      if (identical(class(data), "character") && length(data) == 1L) {
James Lamb's avatar
James Lamb committed
105

106
107
        data <- path.expand(data)

108
        # Data is a filename, create a temporary file with a "lightgbm_" pattern in it
Guolin Ke's avatar
Guolin Ke committed
109
        tmp_filename <- tempfile(pattern = "lightgbm_")
110
        on.exit(unlink(tmp_filename), add = TRUE)
James Lamb's avatar
James Lamb committed
111

112
        # Predict from temporary file
113
114
        .Call(
          LGBM_BoosterPredictForFile_R
115
116
117
118
119
120
          , private$handle
          , data
          , as.integer(header)
          , as.integer(rawscore)
          , as.integer(predleaf)
          , as.integer(predcontrib)
121
          , as.integer(start_iteration)
122
123
          , as.integer(num_iteration)
          , private$params
124
          , tmp_filename
125
        )
James Lamb's avatar
James Lamb committed
126

127
        # Get predictions from file
128
        preds <- utils::read.delim(tmp_filename, header = FALSE, sep = "\t")
Guolin Ke's avatar
Guolin Ke committed
129
        num_row <- nrow(preds)
130
        preds <- as.vector(t(preds))
James Lamb's avatar
James Lamb committed
131

132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
      } else if (predcontrib && inherits(data, c("dsparseMatrix", "dsparseVector"))) {

        ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle)
        ncols_out <- integer(1L)
        .Call(LGBM_BoosterGetNumClasses_R, private$handle, ncols_out)
        ncols_out <- (ncols + 1L) * max(ncols_out, 1L)
        if (is.na(ncols_out)) {
          ncols_out <- as.numeric(ncols + 1L) * as.numeric(max(ncols_out, 1L))
        }
        if (!inherits(data, "dsparseVector") && ncols_out > .Machine$integer.max) {
          stop("Resulting matrix of feature contributions is too large for R to handle.")
        }

        if (inherits(data, "dsparseVector")) {

          if (length(data) > ncols) {
            stop(sprintf("Model was fitted to data with %d columns, input data has %.0f columns."
                         , ncols
                         , length(data)))
          }
          res <- .Call(
            LGBM_BoosterPredictSparseOutput_R
            , private$handle
            , c(0L, as.integer(length(data@x)))
            , data@i - 1L
            , data@x
            , TRUE
            , 1L
            , ncols
            , start_iteration
            , num_iteration
            , private$params
          )
          out <- methods::new("dsparseVector")
          out@i <- res$indices + 1L
          out@x <- res$data
          out@length <- ncols_out
          return(out)

        } else if (inherits(data, "dgRMatrix")) {

          if (ncol(data) > ncols) {
            stop(sprintf("Model was fitted to data with %d columns, input data has %.0f columns."
                         , ncols
                         , ncol(data)))
          }
          res <- .Call(
            LGBM_BoosterPredictSparseOutput_R
            , private$handle
            , data@p
            , data@j
            , data@x
            , TRUE
            , nrow(data)
            , ncols
            , start_iteration
            , num_iteration
            , private$params
          )
          out <- methods::new("dgRMatrix")
          out@p <- res$indptr
          out@j <- res$indices
          out@x <- res$data
          out@Dim <- as.integer(c(nrow(data), ncols_out))

        } else if (inherits(data, "dgCMatrix")) {

          if (ncol(data) != ncols) {
            stop(sprintf("Model was fitted to data with %d columns, input data has %.0f columns."
                         , ncols
                         , ncol(data)))
          }
          res <- .Call(
            LGBM_BoosterPredictSparseOutput_R
            , private$handle
            , data@p
            , data@i
            , data@x
            , FALSE
            , nrow(data)
            , ncols
            , start_iteration
            , num_iteration
            , private$params
          )
          out <- methods::new("dgCMatrix")
          out@p <- res$indptr
          out@i <- res$indices
          out@x <- res$data
          out@Dim <- as.integer(c(nrow(data), length(res$indptr) - 1L))

        } else {

          stop(sprintf("Predictions on sparse inputs are only allowed for '%s', '%s', '%s' - got: %s"
                       , "dsparseVector"
                       , "dgRMatrix"
                       , "dgCMatrix"
229
                       , toString(class(data))))
230
231
232
233
234
235
236
        }

        if (NROW(row.names(data))) {
          out@Dimnames[[1L]] <- row.names(data)
        }
        return(out)

Guolin Ke's avatar
Guolin Ke committed
237
      } else {
James Lamb's avatar
James Lamb committed
238

239
        # Not a file, we need to predict from R object
Guolin Ke's avatar
Guolin Ke committed
240
        num_row <- nrow(data)
241
242
243
        if (is.null(num_row)) {
          num_row <- 1L
        }
James Lamb's avatar
James Lamb committed
244

245
        npred <- 0L
James Lamb's avatar
James Lamb committed
246

247
        # Check number of predictions to do
248
249
        .Call(
          LGBM_BoosterCalcNumPredict_R
250
251
252
253
254
          , private$handle
          , as.integer(num_row)
          , as.integer(rawscore)
          , as.integer(predleaf)
          , as.integer(predcontrib)
255
          , as.integer(start_iteration)
256
          , as.integer(num_iteration)
257
          , npred
258
        )
James Lamb's avatar
James Lamb committed
259

260
261
        # Pre-allocate empty vector
        preds <- numeric(npred)
James Lamb's avatar
James Lamb committed
262

263
        # Check if data is a matrix
Guolin Ke's avatar
Guolin Ke committed
264
        if (is.matrix(data)) {
265
266
          # this if() prevents the memory and computational costs
          # of converting something that is already "double" to "double"
267
268
269
          if (storage.mode(data) != "double") {
            storage.mode(data) <- "double"
          }
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438

          if (nrow(data) == 1L) {

            use_fast_config <- private$check_can_use_fast_predict_config(
              csr = FALSE
              , rawscore = rawscore
              , predleaf = predleaf
              , predcontrib = predcontrib
              , start_iteration = start_iteration
              , num_iteration = num_iteration
            )

            if (use_fast_config) {
              .Call(
                LGBM_BoosterPredictForMatSingleRowFast_R
                , private$fast_predict_config$handle
                , data
                , preds
              )
            } else {
              .Call(
                LGBM_BoosterPredictForMatSingleRow_R
                , private$handle
                , data
                , rawscore
                , predleaf
                , predcontrib
                , start_iteration
                , num_iteration
                , private$params
                , preds
              )
            }

          } else {
            .Call(
              LGBM_BoosterPredictForMat_R
              , private$handle
              , data
              , as.integer(nrow(data))
              , as.integer(ncol(data))
              , as.integer(rawscore)
              , as.integer(predleaf)
              , as.integer(predcontrib)
              , as.integer(start_iteration)
              , as.integer(num_iteration)
              , private$params
              , preds
            )
          }

        } else if (inherits(data, "dsparseVector")) {

          if (length(self$fast_predict_config)) {
            ncols <- self$fast_predict_config$ncols
            use_fast_config <- private$check_can_use_fast_predict_config(
                csr = TRUE
                , rawscore = rawscore
                , predleaf = predleaf
                , predcontrib = predcontrib
                , start_iteration = start_iteration
                , num_iteration = num_iteration
              )
          } else {
            ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle)
            use_fast_config <- FALSE
          }

          if (length(data) > ncols) {
            stop(sprintf("Model was fitted to data with %d columns, input data has %.0f columns."
                         , ncols
                         , length(data)))
          }

          if (use_fast_config) {
            .Call(
              LGBM_BoosterPredictForCSRSingleRowFast_R
              , self$fast_predict_config$handle
              , data@i - 1L
              , data@x
              , preds
            )
          } else {
            .Call(
              LGBM_BoosterPredictForCSRSingleRow_R
              , private$handle
              , data@i - 1L
              , data@x
              , ncols
              , as.integer(rawscore)
              , as.integer(predleaf)
              , as.integer(predcontrib)
              , start_iteration
              , num_iteration
              , private$params
              , preds
            )
          }

        } else if (inherits(data, "dgRMatrix")) {

          ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle)
          if (ncol(data) > ncols) {
            stop(sprintf("Model was fitted to data with %d columns, input data has %.0f columns."
                         , ncols
                         , ncol(data)))
          }

          if (nrow(data) == 1L) {

            if (length(self$fast_predict_config)) {
              ncols <- self$fast_predict_config$ncols
              use_fast_config <- private$check_can_use_fast_predict_config(
                csr = TRUE
                , rawscore = rawscore
                , predleaf = predleaf
                , predcontrib = predcontrib
                , start_iteration = start_iteration
                , num_iteration = num_iteration
              )
            } else {
              ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle)
              use_fast_config <- FALSE
            }

            if (use_fast_config) {
              .Call(
                LGBM_BoosterPredictForCSRSingleRowFast_R
                , self$fast_predict_config$handle
                , data@j
                , data@x
                , preds
              )
            } else {
              .Call(
                LGBM_BoosterPredictForCSRSingleRow_R
                , private$handle
                , data@j
                , data@x
                , ncols
                , as.integer(rawscore)
                , as.integer(predleaf)
                , as.integer(predcontrib)
                , start_iteration
                , num_iteration
                , private$params
                , preds
              )
            }

          } else {

            .Call(
              LGBM_BoosterPredictForCSR_R
              , private$handle
              , data@p
              , data@j
              , data@x
              , ncols
              , as.integer(rawscore)
              , as.integer(predleaf)
              , as.integer(predcontrib)
              , start_iteration
              , num_iteration
              , private$params
              , preds
            )

          }
James Lamb's avatar
James Lamb committed
439
440

        } else if (methods::is(data, "dgCMatrix")) {
441
          if (length(data@p) > 2147483647L) {
442
443
            stop("Cannot support large CSC matrix")
          }
444
          # Check if data is a dgCMatrix (sparse matrix, column compressed format)
445
446
          .Call(
            LGBM_BoosterPredictForCSC_R
447
448
449
450
451
452
453
454
455
456
            , private$handle
            , data@p
            , data@i
            , data@x
            , length(data@p)
            , length(data@x)
            , nrow(data)
            , as.integer(rawscore)
            , as.integer(predleaf)
            , as.integer(predcontrib)
457
            , as.integer(start_iteration)
458
459
            , as.integer(num_iteration)
            , private$params
460
            , preds
461
          )
James Lamb's avatar
James Lamb committed
462

Guolin Ke's avatar
Guolin Ke committed
463
        } else {
James Lamb's avatar
James Lamb committed
464

465
          stop("predict: cannot predict on data of class ", sQuote(class(data)))
James Lamb's avatar
James Lamb committed
466

467
        }
Guolin Ke's avatar
Guolin Ke committed
468
      }
James Lamb's avatar
James Lamb committed
469

470
      # Check if number of rows is strange (not a multiple of the dataset rows)
471
      if (length(preds) %% num_row != 0L) {
472
473
474
        stop(
          "predict: prediction length "
          , sQuote(length(preds))
475
          , " is not a multiple of nrows(data): "
476
477
          , sQuote(num_row)
        )
Guolin Ke's avatar
Guolin Ke committed
478
      }
James Lamb's avatar
James Lamb committed
479

480
      # Get number of cases per row
Guolin Ke's avatar
Guolin Ke committed
481
      npred_per_case <- length(preds) / num_row
James Lamb's avatar
James Lamb committed
482

483
      # Data reshaping
484
      if (npred_per_case > 1L || predleaf || predcontrib) {
485
486
        preds <- matrix(preds, ncol = npred_per_case, byrow = TRUE)
      }
James Lamb's avatar
James Lamb committed
487

488
489
490
491
492
493
494
495
496
      # Keep row names if possible
      if (NROW(row.names(data)) && NROW(data) == NROW(preds)) {
        if (is.null(dim(preds))) {
          names(preds) <- row.names(data)
        } else {
          row.names(preds) <- row.names(data)
        }
      }

497
      return(preds)
Guolin Ke's avatar
Guolin Ke committed
498
    }
James Lamb's avatar
James Lamb committed
499

500
  ),
501
502
503
504
  private = list(
    handle = NULL
    , need_free_handle = FALSE
    , params = ""
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
    , fast_predict_config = list()
    , check_can_use_fast_predict_config = function(csr,
                                                   rawscore,
                                                   predleaf,
                                                   predcontrib,
                                                   start_iteration,
                                                   num_iteration) {

      if (!NROW(private$fast_predict_config)) {
        return(FALSE)
      }

      if (lgb.is.null.handle(private$fast_predict_config$handle)) {
        warning(paste0("Model had fast CSR predict configuration, but it is inactive."
                       , " Try re-generating it through 'lgb.configure_fast_predict'."))
        return(FALSE)
      }

      if (isTRUE(csr) != private$fast_predict_config$csr) {
        return(FALSE)
      }

      return(
        private$params == "" &&
        private$fast_predict_config$rawscore == rawscore &&
        private$fast_predict_config$predleaf == predleaf &&
        private$fast_predict_config$predcontrib == predcontrib &&
        lgb.equal.or.both.null(private$fast_predict_config$start_iteration, start_iteration) &&
        lgb.equal.or.both.null(private$fast_predict_config$num_iteration, num_iteration)
      )
    }
536
  )
Guolin Ke's avatar
Guolin Ke committed
537
)