lgb.Predictor.R 15.8 KB
Newer Older
1
#' @importFrom methods is new
James Lamb's avatar
James Lamb committed
2
#' @importFrom R6 R6Class
3
#' @importFrom utils read.delim
4
#' @importClassesFrom Matrix dsparseMatrix dsparseVector dgCMatrix dgRMatrix CsparseMatrix RsparseMatrix
James Lamb's avatar
James Lamb committed
5
6
Predictor <- R6::R6Class(

7
  classname = "lgb.Predictor",
8
  cloneable = FALSE,
Guolin Ke's avatar
Guolin Ke committed
9
  public = list(
James Lamb's avatar
James Lamb committed
10

11
    # Finalize will free up the handles
Guolin Ke's avatar
Guolin Ke committed
12
    finalize = function() {
James Lamb's avatar
James Lamb committed
13

14
      # Check the need for freeing handle
15
      if (private$need_free_handle) {
James Lamb's avatar
James Lamb committed
16

17
18
        .Call(
          LGBM_BoosterFree_R
19
20
          , private$handle
        )
Guolin Ke's avatar
Guolin Ke committed
21
        private$handle <- NULL
James Lamb's avatar
James Lamb committed
22

Guolin Ke's avatar
Guolin Ke committed
23
      }
James Lamb's avatar
James Lamb committed
24

25
26
      return(invisible(NULL))

27
    },
James Lamb's avatar
James Lamb committed
28

29
    # Initialize will create a starter model
30
    initialize = function(modelfile, params = list(), fast_predict_config = list()) {
31
      private$params <- .params2str(params = params)
32
      handle <- NULL
James Lamb's avatar
James Lamb committed
33

34
      if (is.character(modelfile)) {
James Lamb's avatar
James Lamb committed
35

36
        # Create handle on it
37
        handle <- .Call(
38
          LGBM_BoosterCreateFromModelfile_R
39
          , path.expand(modelfile)
40
        )
41
        private$need_free_handle <- TRUE
James Lamb's avatar
James Lamb committed
42

43
      } else if (methods::is(modelfile, "lgb.Booster.handle") || inherits(modelfile, "externalptr")) {
James Lamb's avatar
James Lamb committed
44

45
        # Check if model file is a booster handle already
Guolin Ke's avatar
Guolin Ke committed
46
        handle <- modelfile
47
        private$need_free_handle <- FALSE
James Lamb's avatar
James Lamb committed
48

49
      } else if (.is_Booster(modelfile)) {
50
51
52
53

        handle <- modelfile$get_handle()
        private$need_free_handle <- FALSE

Guolin Ke's avatar
Guolin Ke committed
54
      } else {
James Lamb's avatar
James Lamb committed
55

56
        stop("lgb.Predictor: modelfile must be either a character filename or an lgb.Booster.handle")
James Lamb's avatar
James Lamb committed
57

Guolin Ke's avatar
Guolin Ke committed
58
      }
James Lamb's avatar
James Lamb committed
59

60
61
      private$fast_predict_config <- fast_predict_config

62
      # Override class and store it
Guolin Ke's avatar
Guolin Ke committed
63
64
      class(handle) <- "lgb.Booster.handle"
      private$handle <- handle
James Lamb's avatar
James Lamb committed
65

66
67
      return(invisible(NULL))

Guolin Ke's avatar
Guolin Ke committed
68
    },
James Lamb's avatar
James Lamb committed
69

70
    # Get current iteration
Guolin Ke's avatar
Guolin Ke committed
71
    current_iter = function() {
James Lamb's avatar
James Lamb committed
72

73
      cur_iter <- 0L
74
75
76
77
      .Call(
        LGBM_BoosterGetCurrentIteration_R
        , private$handle
        , cur_iter
78
      )
79
      return(cur_iter)
James Lamb's avatar
James Lamb committed
80

Guolin Ke's avatar
Guolin Ke committed
81
    },
James Lamb's avatar
James Lamb committed
82

83
84
    # Predict from data
    predict = function(data,
85
                       start_iteration = NULL,
86
87
88
                       num_iteration = NULL,
                       rawscore = FALSE,
                       predleaf = FALSE,
89
                       predcontrib = FALSE,
90
                       header = FALSE) {
James Lamb's avatar
James Lamb committed
91

92
93
      # Check if number of iterations is existing - if not, then set it to -1 (use all)
      if (is.null(num_iteration)) {
94
        num_iteration <- -1L
95
      }
96
97
98
99
      # Check if start iterations is existing - if not, then set it to 0 (start from the first iteration)
      if (is.null(start_iteration)) {
        start_iteration <- 0L
      }
James Lamb's avatar
James Lamb committed
100

Laurae's avatar
Laurae committed
101
      # Check if data is a file name and not a matrix
102
      if (identical(class(data), "character") && length(data) == 1L) {
James Lamb's avatar
James Lamb committed
103

104
105
        data <- path.expand(data)

106
        # Data is a filename, create a temporary file with a "lightgbm_" pattern in it
Guolin Ke's avatar
Guolin Ke committed
107
        tmp_filename <- tempfile(pattern = "lightgbm_")
108
        on.exit(unlink(tmp_filename), add = TRUE)
James Lamb's avatar
James Lamb committed
109

110
        # Predict from temporary file
111
112
        .Call(
          LGBM_BoosterPredictForFile_R
113
114
115
116
117
118
          , private$handle
          , data
          , as.integer(header)
          , as.integer(rawscore)
          , as.integer(predleaf)
          , as.integer(predcontrib)
119
          , as.integer(start_iteration)
120
121
          , as.integer(num_iteration)
          , private$params
122
          , tmp_filename
123
        )
James Lamb's avatar
James Lamb committed
124

125
        # Get predictions from file
126
        preds <- utils::read.delim(tmp_filename, header = FALSE, sep = "\t")
Guolin Ke's avatar
Guolin Ke committed
127
        num_row <- nrow(preds)
128
        preds <- as.vector(t(preds))
James Lamb's avatar
James Lamb committed
129

130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
      } else if (predcontrib && inherits(data, c("dsparseMatrix", "dsparseVector"))) {

        ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle)
        ncols_out <- integer(1L)
        .Call(LGBM_BoosterGetNumClasses_R, private$handle, ncols_out)
        ncols_out <- (ncols + 1L) * max(ncols_out, 1L)
        if (is.na(ncols_out)) {
          ncols_out <- as.numeric(ncols + 1L) * as.numeric(max(ncols_out, 1L))
        }
        if (!inherits(data, "dsparseVector") && ncols_out > .Machine$integer.max) {
          stop("Resulting matrix of feature contributions is too large for R to handle.")
        }

        if (inherits(data, "dsparseVector")) {

          if (length(data) > ncols) {
            stop(sprintf("Model was fitted to data with %d columns, input data has %.0f columns."
                         , ncols
                         , length(data)))
          }
          res <- .Call(
            LGBM_BoosterPredictSparseOutput_R
            , private$handle
            , c(0L, as.integer(length(data@x)))
            , data@i - 1L
            , data@x
            , TRUE
            , 1L
            , ncols
            , start_iteration
            , num_iteration
            , private$params
          )
          out <- methods::new("dsparseVector")
          out@i <- res$indices + 1L
          out@x <- res$data
          out@length <- ncols_out
          return(out)

        } else if (inherits(data, "dgRMatrix")) {

          if (ncol(data) > ncols) {
            stop(sprintf("Model was fitted to data with %d columns, input data has %.0f columns."
                         , ncols
                         , ncol(data)))
          }
          res <- .Call(
            LGBM_BoosterPredictSparseOutput_R
            , private$handle
            , data@p
            , data@j
            , data@x
            , TRUE
            , nrow(data)
            , ncols
            , start_iteration
            , num_iteration
            , private$params
          )
          out <- methods::new("dgRMatrix")
          out@p <- res$indptr
          out@j <- res$indices
          out@x <- res$data
          out@Dim <- as.integer(c(nrow(data), ncols_out))

        } else if (inherits(data, "dgCMatrix")) {

          if (ncol(data) != ncols) {
            stop(sprintf("Model was fitted to data with %d columns, input data has %.0f columns."
                         , ncols
                         , ncol(data)))
          }
          res <- .Call(
            LGBM_BoosterPredictSparseOutput_R
            , private$handle
            , data@p
            , data@i
            , data@x
            , FALSE
            , nrow(data)
            , ncols
            , start_iteration
            , num_iteration
            , private$params
          )
          out <- methods::new("dgCMatrix")
          out@p <- res$indptr
          out@i <- res$indices
          out@x <- res$data
          out@Dim <- as.integer(c(nrow(data), length(res$indptr) - 1L))

        } else {

          stop(sprintf("Predictions on sparse inputs are only allowed for '%s', '%s', '%s' - got: %s"
                       , "dsparseVector"
                       , "dgRMatrix"
                       , "dgCMatrix"
227
                       , toString(class(data))))
228
229
230
231
232
233
234
        }

        if (NROW(row.names(data))) {
          out@Dimnames[[1L]] <- row.names(data)
        }
        return(out)

Guolin Ke's avatar
Guolin Ke committed
235
      } else {
James Lamb's avatar
James Lamb committed
236

237
        # Not a file, we need to predict from R object
Guolin Ke's avatar
Guolin Ke committed
238
        num_row <- nrow(data)
239
240
241
        if (is.null(num_row)) {
          num_row <- 1L
        }
James Lamb's avatar
James Lamb committed
242

243
        npred <- 0L
James Lamb's avatar
James Lamb committed
244

245
        # Check number of predictions to do
246
247
        .Call(
          LGBM_BoosterCalcNumPredict_R
248
249
250
251
252
          , private$handle
          , as.integer(num_row)
          , as.integer(rawscore)
          , as.integer(predleaf)
          , as.integer(predcontrib)
253
          , as.integer(start_iteration)
254
          , as.integer(num_iteration)
255
          , npred
256
        )
James Lamb's avatar
James Lamb committed
257

258
259
        # Pre-allocate empty vector
        preds <- numeric(npred)
James Lamb's avatar
James Lamb committed
260

261
        # Check if data is a matrix
Guolin Ke's avatar
Guolin Ke committed
262
        if (is.matrix(data)) {
263
264
          # this if() prevents the memory and computational costs
          # of converting something that is already "double" to "double"
265
266
267
          if (storage.mode(data) != "double") {
            storage.mode(data) <- "double"
          }
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436

          if (nrow(data) == 1L) {

            use_fast_config <- private$check_can_use_fast_predict_config(
              csr = FALSE
              , rawscore = rawscore
              , predleaf = predleaf
              , predcontrib = predcontrib
              , start_iteration = start_iteration
              , num_iteration = num_iteration
            )

            if (use_fast_config) {
              .Call(
                LGBM_BoosterPredictForMatSingleRowFast_R
                , private$fast_predict_config$handle
                , data
                , preds
              )
            } else {
              .Call(
                LGBM_BoosterPredictForMatSingleRow_R
                , private$handle
                , data
                , rawscore
                , predleaf
                , predcontrib
                , start_iteration
                , num_iteration
                , private$params
                , preds
              )
            }

          } else {
            .Call(
              LGBM_BoosterPredictForMat_R
              , private$handle
              , data
              , as.integer(nrow(data))
              , as.integer(ncol(data))
              , as.integer(rawscore)
              , as.integer(predleaf)
              , as.integer(predcontrib)
              , as.integer(start_iteration)
              , as.integer(num_iteration)
              , private$params
              , preds
            )
          }

        } else if (inherits(data, "dsparseVector")) {

          if (length(self$fast_predict_config)) {
            ncols <- self$fast_predict_config$ncols
            use_fast_config <- private$check_can_use_fast_predict_config(
                csr = TRUE
                , rawscore = rawscore
                , predleaf = predleaf
                , predcontrib = predcontrib
                , start_iteration = start_iteration
                , num_iteration = num_iteration
              )
          } else {
            ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle)
            use_fast_config <- FALSE
          }

          if (length(data) > ncols) {
            stop(sprintf("Model was fitted to data with %d columns, input data has %.0f columns."
                         , ncols
                         , length(data)))
          }

          if (use_fast_config) {
            .Call(
              LGBM_BoosterPredictForCSRSingleRowFast_R
              , self$fast_predict_config$handle
              , data@i - 1L
              , data@x
              , preds
            )
          } else {
            .Call(
              LGBM_BoosterPredictForCSRSingleRow_R
              , private$handle
              , data@i - 1L
              , data@x
              , ncols
              , as.integer(rawscore)
              , as.integer(predleaf)
              , as.integer(predcontrib)
              , start_iteration
              , num_iteration
              , private$params
              , preds
            )
          }

        } else if (inherits(data, "dgRMatrix")) {

          ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle)
          if (ncol(data) > ncols) {
            stop(sprintf("Model was fitted to data with %d columns, input data has %.0f columns."
                         , ncols
                         , ncol(data)))
          }

          if (nrow(data) == 1L) {

            if (length(self$fast_predict_config)) {
              ncols <- self$fast_predict_config$ncols
              use_fast_config <- private$check_can_use_fast_predict_config(
                csr = TRUE
                , rawscore = rawscore
                , predleaf = predleaf
                , predcontrib = predcontrib
                , start_iteration = start_iteration
                , num_iteration = num_iteration
              )
            } else {
              ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle)
              use_fast_config <- FALSE
            }

            if (use_fast_config) {
              .Call(
                LGBM_BoosterPredictForCSRSingleRowFast_R
                , self$fast_predict_config$handle
                , data@j
                , data@x
                , preds
              )
            } else {
              .Call(
                LGBM_BoosterPredictForCSRSingleRow_R
                , private$handle
                , data@j
                , data@x
                , ncols
                , as.integer(rawscore)
                , as.integer(predleaf)
                , as.integer(predcontrib)
                , start_iteration
                , num_iteration
                , private$params
                , preds
              )
            }

          } else {

            .Call(
              LGBM_BoosterPredictForCSR_R
              , private$handle
              , data@p
              , data@j
              , data@x
              , ncols
              , as.integer(rawscore)
              , as.integer(predleaf)
              , as.integer(predcontrib)
              , start_iteration
              , num_iteration
              , private$params
              , preds
            )

          }
James Lamb's avatar
James Lamb committed
437
438

        } else if (methods::is(data, "dgCMatrix")) {
439
          if (length(data@p) > 2147483647L) {
440
441
            stop("Cannot support large CSC matrix")
          }
442
          # Check if data is a dgCMatrix (sparse matrix, column compressed format)
443
444
          .Call(
            LGBM_BoosterPredictForCSC_R
445
446
447
448
449
450
451
452
453
454
            , private$handle
            , data@p
            , data@i
            , data@x
            , length(data@p)
            , length(data@x)
            , nrow(data)
            , as.integer(rawscore)
            , as.integer(predleaf)
            , as.integer(predcontrib)
455
            , as.integer(start_iteration)
456
457
            , as.integer(num_iteration)
            , private$params
458
            , preds
459
          )
James Lamb's avatar
James Lamb committed
460

Guolin Ke's avatar
Guolin Ke committed
461
        } else {
James Lamb's avatar
James Lamb committed
462

463
          stop("predict: cannot predict on data of class ", sQuote(class(data)))
James Lamb's avatar
James Lamb committed
464

465
        }
Guolin Ke's avatar
Guolin Ke committed
466
      }
James Lamb's avatar
James Lamb committed
467

468
      # Check if number of rows is strange (not a multiple of the dataset rows)
469
      if (length(preds) %% num_row != 0L) {
470
471
472
        stop(
          "predict: prediction length "
          , sQuote(length(preds))
473
          , " is not a multiple of nrows(data): "
474
475
          , sQuote(num_row)
        )
Guolin Ke's avatar
Guolin Ke committed
476
      }
James Lamb's avatar
James Lamb committed
477

478
      # Get number of cases per row
Guolin Ke's avatar
Guolin Ke committed
479
      npred_per_case <- length(preds) / num_row
James Lamb's avatar
James Lamb committed
480

481
      # Data reshaping
482
      if (npred_per_case > 1L || predleaf || predcontrib) {
483
484
        preds <- matrix(preds, ncol = npred_per_case, byrow = TRUE)
      }
James Lamb's avatar
James Lamb committed
485

486
487
488
489
490
491
492
493
494
      # Keep row names if possible
      if (NROW(row.names(data)) && NROW(data) == NROW(preds)) {
        if (is.null(dim(preds))) {
          names(preds) <- row.names(data)
        } else {
          row.names(preds) <- row.names(data)
        }
      }

495
      return(preds)
Guolin Ke's avatar
Guolin Ke committed
496
    }
James Lamb's avatar
James Lamb committed
497

498
  ),
499
500
501
502
  private = list(
    handle = NULL
    , need_free_handle = FALSE
    , params = ""
503
504
505
506
507
508
509
510
511
512
513
514
    , fast_predict_config = list()
    , check_can_use_fast_predict_config = function(csr,
                                                   rawscore,
                                                   predleaf,
                                                   predcontrib,
                                                   start_iteration,
                                                   num_iteration) {

      if (!NROW(private$fast_predict_config)) {
        return(FALSE)
      }

515
      if (.is_null_handle(private$fast_predict_config$handle)) {
516
517
518
519
520
521
522
523
524
525
526
527
528
529
        warning(paste0("Model had fast CSR predict configuration, but it is inactive."
                       , " Try re-generating it through 'lgb.configure_fast_predict'."))
        return(FALSE)
      }

      if (isTRUE(csr) != private$fast_predict_config$csr) {
        return(FALSE)
      }

      return(
        private$params == "" &&
        private$fast_predict_config$rawscore == rawscore &&
        private$fast_predict_config$predleaf == predleaf &&
        private$fast_predict_config$predcontrib == predcontrib &&
530
531
        .equal_or_both_null(private$fast_predict_config$start_iteration, start_iteration) &&
        .equal_or_both_null(private$fast_predict_config$num_iteration, num_iteration)
532
533
      )
    }
534
  )
Guolin Ke's avatar
Guolin Ke committed
535
)