lgb.Predictor.R 9.72 KB
Newer Older
1
2
#' @importFrom methods is new
#' @importClassesFrom Matrix dsparseMatrix dsparseVector dgCMatrix dgRMatrix
James Lamb's avatar
James Lamb committed
3
#' @importFrom R6 R6Class
4
#' @importFrom utils read.delim
James Lamb's avatar
James Lamb committed
5
6
Predictor <- R6::R6Class(

7
  classname = "lgb.Predictor",
8
  cloneable = FALSE,
Guolin Ke's avatar
Guolin Ke committed
9
  public = list(
James Lamb's avatar
James Lamb committed
10

11
    # Finalize will free up the handles
Guolin Ke's avatar
Guolin Ke committed
12
    finalize = function() {
James Lamb's avatar
James Lamb committed
13

14
      # Check the need for freeing handle
15
      if (private$need_free_handle) {
James Lamb's avatar
James Lamb committed
16

17
18
        .Call(
          LGBM_BoosterFree_R
19
20
          , private$handle
        )
Guolin Ke's avatar
Guolin Ke committed
21
        private$handle <- NULL
James Lamb's avatar
James Lamb committed
22

Guolin Ke's avatar
Guolin Ke committed
23
      }
James Lamb's avatar
James Lamb committed
24

25
26
      return(invisible(NULL))

27
    },
James Lamb's avatar
James Lamb committed
28

29
    # Initialize will create a starter model
30
    initialize = function(modelfile, params = list()) {
31
      private$params <- lgb.params2str(params = params)
32
      handle <- NULL
James Lamb's avatar
James Lamb committed
33

34
      if (is.character(modelfile)) {
James Lamb's avatar
James Lamb committed
35

36
        # Create handle on it
37
        handle <- .Call(
38
          LGBM_BoosterCreateFromModelfile_R
39
          , path.expand(modelfile)
40
        )
41
        private$need_free_handle <- TRUE
James Lamb's avatar
James Lamb committed
42

43
      } else if (methods::is(modelfile, "lgb.Booster.handle") || inherits(modelfile, "externalptr")) {
James Lamb's avatar
James Lamb committed
44

45
        # Check if model file is a booster handle already
Guolin Ke's avatar
Guolin Ke committed
46
        handle <- modelfile
47
        private$need_free_handle <- FALSE
James Lamb's avatar
James Lamb committed
48

49
50
51
52
53
      } else if (lgb.is.Booster(modelfile)) {

        handle <- modelfile$get_handle()
        private$need_free_handle <- FALSE

Guolin Ke's avatar
Guolin Ke committed
54
      } else {
James Lamb's avatar
James Lamb committed
55

56
        stop("lgb.Predictor: modelfile must be either a character filename or an lgb.Booster.handle")
James Lamb's avatar
James Lamb committed
57

Guolin Ke's avatar
Guolin Ke committed
58
      }
James Lamb's avatar
James Lamb committed
59

60
      # Override class and store it
Guolin Ke's avatar
Guolin Ke committed
61
62
      class(handle) <- "lgb.Booster.handle"
      private$handle <- handle
James Lamb's avatar
James Lamb committed
63

64
65
      return(invisible(NULL))

Guolin Ke's avatar
Guolin Ke committed
66
    },
James Lamb's avatar
James Lamb committed
67

68
    # Get current iteration
Guolin Ke's avatar
Guolin Ke committed
69
    current_iter = function() {
James Lamb's avatar
James Lamb committed
70

71
      cur_iter <- 0L
72
73
74
75
      .Call(
        LGBM_BoosterGetCurrentIteration_R
        , private$handle
        , cur_iter
76
      )
77
      return(cur_iter)
James Lamb's avatar
James Lamb committed
78

Guolin Ke's avatar
Guolin Ke committed
79
    },
James Lamb's avatar
James Lamb committed
80

81
82
    # Predict from data
    predict = function(data,
83
                       start_iteration = NULL,
84
85
86
                       num_iteration = NULL,
                       rawscore = FALSE,
                       predleaf = FALSE,
87
                       predcontrib = FALSE,
88
                       header = FALSE) {
James Lamb's avatar
James Lamb committed
89

90
91
      # Check if number of iterations is existing - if not, then set it to -1 (use all)
      if (is.null(num_iteration)) {
92
        num_iteration <- -1L
93
      }
94
95
96
97
      # Check if start iterations is existing - if not, then set it to 0 (start from the first iteration)
      if (is.null(start_iteration)) {
        start_iteration <- 0L
      }
James Lamb's avatar
James Lamb committed
98

99
      num_row <- 0L
James Lamb's avatar
James Lamb committed
100

Laurae's avatar
Laurae committed
101
      # Check if data is a file name and not a matrix
102
      if (identical(class(data), "character") && length(data) == 1L) {
James Lamb's avatar
James Lamb committed
103

104
105
        data <- path.expand(data)

106
        # Data is a filename, create a temporary file with a "lightgbm_" pattern in it
Guolin Ke's avatar
Guolin Ke committed
107
        tmp_filename <- tempfile(pattern = "lightgbm_")
108
        on.exit(unlink(tmp_filename), add = TRUE)
James Lamb's avatar
James Lamb committed
109

110
        # Predict from temporary file
111
112
        .Call(
          LGBM_BoosterPredictForFile_R
113
114
115
116
117
118
          , private$handle
          , data
          , as.integer(header)
          , as.integer(rawscore)
          , as.integer(predleaf)
          , as.integer(predcontrib)
119
          , as.integer(start_iteration)
120
121
          , as.integer(num_iteration)
          , private$params
122
          , tmp_filename
123
        )
James Lamb's avatar
James Lamb committed
124

125
        # Get predictions from file
126
        preds <- utils::read.delim(tmp_filename, header = FALSE, sep = "\t")
Guolin Ke's avatar
Guolin Ke committed
127
        num_row <- nrow(preds)
128
        preds <- as.vector(t(preds))
James Lamb's avatar
James Lamb committed
129

130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
      } else if (predcontrib && inherits(data, c("dsparseMatrix", "dsparseVector"))) {

        ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle)
        ncols_out <- integer(1L)
        .Call(LGBM_BoosterGetNumClasses_R, private$handle, ncols_out)
        ncols_out <- (ncols + 1L) * max(ncols_out, 1L)
        if (is.na(ncols_out)) {
          ncols_out <- as.numeric(ncols + 1L) * as.numeric(max(ncols_out, 1L))
        }
        if (!inherits(data, "dsparseVector") && ncols_out > .Machine$integer.max) {
          stop("Resulting matrix of feature contributions is too large for R to handle.")
        }

        if (inherits(data, "dsparseVector")) {

          if (length(data) > ncols) {
            stop(sprintf("Model was fitted to data with %d columns, input data has %.0f columns."
                         , ncols
                         , length(data)))
          }
          res <- .Call(
            LGBM_BoosterPredictSparseOutput_R
            , private$handle
            , c(0L, as.integer(length(data@x)))
            , data@i - 1L
            , data@x
            , TRUE
            , 1L
            , ncols
            , start_iteration
            , num_iteration
            , private$params
          )
          out <- methods::new("dsparseVector")
          out@i <- res$indices + 1L
          out@x <- res$data
          out@length <- ncols_out
          return(out)

        } else if (inherits(data, "dgRMatrix")) {

          if (ncol(data) > ncols) {
            stop(sprintf("Model was fitted to data with %d columns, input data has %.0f columns."
                         , ncols
                         , ncol(data)))
          }
          res <- .Call(
            LGBM_BoosterPredictSparseOutput_R
            , private$handle
            , data@p
            , data@j
            , data@x
            , TRUE
            , nrow(data)
            , ncols
            , start_iteration
            , num_iteration
            , private$params
          )
          out <- methods::new("dgRMatrix")
          out@p <- res$indptr
          out@j <- res$indices
          out@x <- res$data
          out@Dim <- as.integer(c(nrow(data), ncols_out))

        } else if (inherits(data, "dgCMatrix")) {

          if (ncol(data) != ncols) {
            stop(sprintf("Model was fitted to data with %d columns, input data has %.0f columns."
                         , ncols
                         , ncol(data)))
          }
          res <- .Call(
            LGBM_BoosterPredictSparseOutput_R
            , private$handle
            , data@p
            , data@i
            , data@x
            , FALSE
            , nrow(data)
            , ncols
            , start_iteration
            , num_iteration
            , private$params
          )
          out <- methods::new("dgCMatrix")
          out@p <- res$indptr
          out@i <- res$indices
          out@x <- res$data
          out@Dim <- as.integer(c(nrow(data), length(res$indptr) - 1L))

        } else {

          stop(sprintf("Predictions on sparse inputs are only allowed for '%s', '%s', '%s' - got: %s"
                       , "dsparseVector"
                       , "dgRMatrix"
                       , "dgCMatrix"
227
                       , toString(class(data))))
228
229
230
231
232
233
234
        }

        if (NROW(row.names(data))) {
          out@Dimnames[[1L]] <- row.names(data)
        }
        return(out)

Guolin Ke's avatar
Guolin Ke committed
235
      } else {
James Lamb's avatar
James Lamb committed
236

237
        # Not a file, we need to predict from R object
Guolin Ke's avatar
Guolin Ke committed
238
        num_row <- nrow(data)
James Lamb's avatar
James Lamb committed
239

240
        npred <- 0L
James Lamb's avatar
James Lamb committed
241

242
        # Check number of predictions to do
243
244
        .Call(
          LGBM_BoosterCalcNumPredict_R
245
246
247
248
249
          , private$handle
          , as.integer(num_row)
          , as.integer(rawscore)
          , as.integer(predleaf)
          , as.integer(predcontrib)
250
          , as.integer(start_iteration)
251
          , as.integer(num_iteration)
252
          , npred
253
        )
James Lamb's avatar
James Lamb committed
254

255
256
        # Pre-allocate empty vector
        preds <- numeric(npred)
James Lamb's avatar
James Lamb committed
257

258
        # Check if data is a matrix
Guolin Ke's avatar
Guolin Ke committed
259
        if (is.matrix(data)) {
260
261
          # this if() prevents the memory and computational costs
          # of converting something that is already "double" to "double"
262
263
264
          if (storage.mode(data) != "double") {
            storage.mode(data) <- "double"
          }
265
266
          .Call(
            LGBM_BoosterPredictForMat_R
267
268
269
270
271
272
273
            , private$handle
            , data
            , as.integer(nrow(data))
            , as.integer(ncol(data))
            , as.integer(rawscore)
            , as.integer(predleaf)
            , as.integer(predcontrib)
274
            , as.integer(start_iteration)
275
276
            , as.integer(num_iteration)
            , private$params
277
            , preds
278
          )
James Lamb's avatar
James Lamb committed
279
280

        } else if (methods::is(data, "dgCMatrix")) {
281
          if (length(data@p) > 2147483647L) {
282
283
            stop("Cannot support large CSC matrix")
          }
284
          # Check if data is a dgCMatrix (sparse matrix, column compressed format)
285
286
          .Call(
            LGBM_BoosterPredictForCSC_R
287
288
289
290
291
292
293
294
295
296
            , private$handle
            , data@p
            , data@i
            , data@x
            , length(data@p)
            , length(data@x)
            , nrow(data)
            , as.integer(rawscore)
            , as.integer(predleaf)
            , as.integer(predcontrib)
297
            , as.integer(start_iteration)
298
299
            , as.integer(num_iteration)
            , private$params
300
            , preds
301
          )
James Lamb's avatar
James Lamb committed
302

Guolin Ke's avatar
Guolin Ke committed
303
        } else {
James Lamb's avatar
James Lamb committed
304

305
          stop("predict: cannot predict on data of class ", sQuote(class(data)))
James Lamb's avatar
James Lamb committed
306

307
        }
Guolin Ke's avatar
Guolin Ke committed
308
      }
James Lamb's avatar
James Lamb committed
309

310
      # Check if number of rows is strange (not a multiple of the dataset rows)
311
      if (length(preds) %% num_row != 0L) {
312
313
314
        stop(
          "predict: prediction length "
          , sQuote(length(preds))
315
          , " is not a multiple of nrows(data): "
316
317
          , sQuote(num_row)
        )
Guolin Ke's avatar
Guolin Ke committed
318
      }
James Lamb's avatar
James Lamb committed
319

320
      # Get number of cases per row
Guolin Ke's avatar
Guolin Ke committed
321
      npred_per_case <- length(preds) / num_row
James Lamb's avatar
James Lamb committed
322

323
      # Data reshaping
324
      if (npred_per_case > 1L || predleaf || predcontrib) {
325
326
        preds <- matrix(preds, ncol = npred_per_case, byrow = TRUE)
      }
James Lamb's avatar
James Lamb committed
327

328
329
330
331
332
333
334
335
336
      # Keep row names if possible
      if (NROW(row.names(data)) && NROW(data) == NROW(preds)) {
        if (is.null(dim(preds))) {
          names(preds) <- row.names(data)
        } else {
          row.names(preds) <- row.names(data)
        }
      }

337
      return(preds)
Guolin Ke's avatar
Guolin Ke committed
338
    }
James Lamb's avatar
James Lamb committed
339

340
  ),
341
342
343
344
345
  private = list(
    handle = NULL
    , need_free_handle = FALSE
    , params = ""
  )
Guolin Ke's avatar
Guolin Ke committed
346
)