lgb.Dataset.R 29.8 KB
Newer Older
James Lamb's avatar
James Lamb committed
1
#' @importFrom methods is
James Lamb's avatar
James Lamb committed
2
3
4
#' @importFrom R6 R6Class
Dataset <- R6::R6Class(

5
  classname = "lgb.Dataset",
6
  cloneable = FALSE,
Guolin Ke's avatar
Guolin Ke committed
7
  public = list(
James Lamb's avatar
James Lamb committed
8

9
    # Finalize will free up the handles
Guolin Ke's avatar
Guolin Ke committed
10
    finalize = function() {
James Lamb's avatar
James Lamb committed
11

12
      # Check the need for freeing handle
Guolin Ke's avatar
Guolin Ke committed
13
      if (!lgb.is.null.handle(private$handle)) {
James Lamb's avatar
James Lamb committed
14

15
        # Freeing up handle
Guolin Ke's avatar
Guolin Ke committed
16
17
        lgb.call("LGBM_DatasetFree_R", ret = NULL, private$handle)
        private$handle <- NULL
James Lamb's avatar
James Lamb committed
18

Guolin Ke's avatar
Guolin Ke committed
19
      }
James Lamb's avatar
James Lamb committed
20

Guolin Ke's avatar
Guolin Ke committed
21
    },
James Lamb's avatar
James Lamb committed
22

23
    # Initialize will create a starter dataset
Guolin Ke's avatar
Guolin Ke committed
24
    initialize = function(data,
25
26
27
                          params = list(),
                          reference = NULL,
                          colnames = NULL,
28
                          categorical_feature = NULL,
29
30
31
32
                          predictor = NULL,
                          free_raw_data = TRUE,
                          used_indices = NULL,
                          info = list(),
Guolin Ke's avatar
Guolin Ke committed
33
                          ...) {
James Lamb's avatar
James Lamb committed
34

35
      # Check for additional parameters
36
      additional_params <- list(...)
James Lamb's avatar
James Lamb committed
37

38
39
      # Create known attributes list
      INFO_KEYS <- c("label", "weight", "init_score", "group")
James Lamb's avatar
James Lamb committed
40

41
      # Check if attribute key is in the known attribute list
42
      for (key in names(additional_params)) {
James Lamb's avatar
James Lamb committed
43

44
        # Key existing
45
        if (key %in% INFO_KEYS) {
James Lamb's avatar
James Lamb committed
46

47
          # Store as info
48
          info[[key]] <- additional_params[[key]]
James Lamb's avatar
James Lamb committed
49

Guolin Ke's avatar
Guolin Ke committed
50
        } else {
James Lamb's avatar
James Lamb committed
51

52
          # Store as param
53
          params[[key]] <- additional_params[[key]]
James Lamb's avatar
James Lamb committed
54

Guolin Ke's avatar
Guolin Ke committed
55
        }
James Lamb's avatar
James Lamb committed
56

Guolin Ke's avatar
Guolin Ke committed
57
      }
James Lamb's avatar
James Lamb committed
58

59
      # Check for dataset reference
Guolin Ke's avatar
Guolin Ke committed
60
61
      if (!is.null(reference)) {
        if (!lgb.check.r6.class(reference, "lgb.Dataset")) {
62
          stop("lgb.Dataset: Can only use ", sQuote("lgb.Dataset"), " as reference")
Guolin Ke's avatar
Guolin Ke committed
63
64
        }
      }
James Lamb's avatar
James Lamb committed
65

66
      # Check for predictor reference
Guolin Ke's avatar
Guolin Ke committed
67
68
      if (!is.null(predictor)) {
        if (!lgb.check.r6.class(predictor, "lgb.Predictor")) {
69
          stop("lgb.Dataset: Only can use ", sQuote("lgb.Predictor"), " as predictor")
Guolin Ke's avatar
Guolin Ke committed
70
71
        }
      }
James Lamb's avatar
James Lamb committed
72

73
74
75
76
77
78
79
      # Check for matrix format
      if (is.matrix(data)) {
        # Check whether matrix is the correct type first ("double")
        if (storage.mode(data) != "double") {
          storage.mode(data) <- "double"
        }
      }
James Lamb's avatar
James Lamb committed
80

81
82
83
      # Setup private attributes
      private$raw_data <- data
      private$params <- params
Guolin Ke's avatar
Guolin Ke committed
84
      private$reference <- reference
85
      private$colnames <- colnames
86

87
      private$categorical_feature <- categorical_feature
88
89
90
91
      private$predictor <- predictor
      private$free_raw_data <- free_raw_data
      private$used_indices <- used_indices
      private$info <- info
James Lamb's avatar
James Lamb committed
92

Guolin Ke's avatar
Guolin Ke committed
93
    },
James Lamb's avatar
James Lamb committed
94

95
96
97
    create_valid = function(data,
                            info = list(),
                            ...) {
James Lamb's avatar
James Lamb committed
98

99
100
101
102
103
104
105
106
107
108
109
      # Create new dataset
      ret <- Dataset$new(data,
                         private$params,
                         self,
                         private$colnames,
                         private$categorical_feature,
                         private$predictor,
                         private$free_raw_data,
                         NULL,
                         info,
                         ...)
James Lamb's avatar
James Lamb committed
110

111
      # Return ret
112
      return(invisible(ret))
James Lamb's avatar
James Lamb committed
113

Guolin Ke's avatar
Guolin Ke committed
114
    },
James Lamb's avatar
James Lamb committed
115

116
    # Dataset constructor
Guolin Ke's avatar
Guolin Ke committed
117
    construct = function() {
James Lamb's avatar
James Lamb committed
118

119
      # Check for handle null
Guolin Ke's avatar
Guolin Ke committed
120
      if (!lgb.is.null.handle(private$handle)) {
121
        return(invisible(self))
Guolin Ke's avatar
Guolin Ke committed
122
      }
James Lamb's avatar
James Lamb committed
123

Guolin Ke's avatar
Guolin Ke committed
124
125
      # Get feature names
      cnames <- NULL
James Lamb's avatar
James Lamb committed
126
      if (is.matrix(private$raw_data) || methods::is(private$raw_data, "dgCMatrix")) {
Guolin Ke's avatar
Guolin Ke committed
127
128
        cnames <- colnames(private$raw_data)
      }
James Lamb's avatar
James Lamb committed
129

Guolin Ke's avatar
Guolin Ke committed
130
      # set feature names if not exist
131
      if (is.null(private$colnames) && !is.null(cnames)) {
Guolin Ke's avatar
Guolin Ke committed
132
133
        private$colnames <- as.character(cnames)
      }
James Lamb's avatar
James Lamb committed
134

135
136
      # Get categorical feature index
      if (!is.null(private$categorical_feature)) {
James Lamb's avatar
James Lamb committed
137

138
        # Check for character name
139
        if (is.character(private$categorical_feature)) {
James Lamb's avatar
James Lamb committed
140

141
            cate_indices <- as.list(match(private$categorical_feature, private$colnames) - 1)
James Lamb's avatar
James Lamb committed
142

143
            # Provided indices, but some indices are not existing?
144
145
146
            if (sum(is.na(cate_indices)) > 0) {
              stop("lgb.self.get.handle: supplied an unknown feature in categorical_feature: ", sQuote(private$categorical_feature[is.na(cate_indices)]))
            }
James Lamb's avatar
James Lamb committed
147

148
          } else {
James Lamb's avatar
James Lamb committed
149

150
            # Check if more categorical features were output over the feature space
151
152
153
            if (max(private$categorical_feature) > length(private$colnames)) {
              stop("lgb.self.get.handle: supplied a too large value in categorical_feature: ", max(private$categorical_feature), " but only ", length(private$colnames), " features")
            }
James Lamb's avatar
James Lamb committed
154

155
            # Store indices as [0, n-1] indexed instead of [1, n] indexed
156
            cate_indices <- as.list(private$categorical_feature - 1)
James Lamb's avatar
James Lamb committed
157

158
          }
James Lamb's avatar
James Lamb committed
159

160
        # Store indices for categorical features
161
        private$params$categorical_feature <- cate_indices
James Lamb's avatar
James Lamb committed
162

163
      }
James Lamb's avatar
James Lamb committed
164

Guolin Ke's avatar
Guolin Ke committed
165
166
      # Check has header or not
      has_header <- FALSE
167
168
      if (!is.null(private$params$has_header) || !is.null(private$params$header)) {
        if (tolower(as.character(private$params$has_header)) == "true" || tolower(as.character(private$params$header)) == "true") {
Guolin Ke's avatar
Guolin Ke committed
169
170
171
          has_header <- TRUE
        }
      }
James Lamb's avatar
James Lamb committed
172

Guolin Ke's avatar
Guolin Ke committed
173
174
      # Generate parameter str
      params_str <- lgb.params2str(private$params)
James Lamb's avatar
James Lamb committed
175

176
      # Get handle of reference dataset
Guolin Ke's avatar
Guolin Ke committed
177
178
179
180
      ref_handle <- NULL
      if (!is.null(private$reference)) {
        ref_handle <- private$reference$.__enclos_env__$private$get_handle()
      }
181
      handle <- NA_real_
James Lamb's avatar
James Lamb committed
182

183
      # Not subsetting
Guolin Ke's avatar
Guolin Ke committed
184
      if (is.null(private$used_indices)) {
James Lamb's avatar
James Lamb committed
185

186
        # Are we using a data file?
187
        if (is.character(private$raw_data)) {
James Lamb's avatar
James Lamb committed
188

189
190
191
192
193
          handle <- lgb.call("LGBM_DatasetCreateFromFile_R",
                             ret = handle,
                             lgb.c_str(private$raw_data),
                             params_str,
                             ref_handle)
James Lamb's avatar
James Lamb committed
194

Guolin Ke's avatar
Guolin Ke committed
195
        } else if (is.matrix(private$raw_data)) {
James Lamb's avatar
James Lamb committed
196

197
198
199
200
201
202
203
204
          # Are we using a matrix?
          handle <- lgb.call("LGBM_DatasetCreateFromMat_R",
                             ret = handle,
                             private$raw_data,
                             nrow(private$raw_data),
                             ncol(private$raw_data),
                             params_str,
                             ref_handle)
James Lamb's avatar
James Lamb committed
205
206

        } else if (methods::is(private$raw_data, "dgCMatrix")) {
207
208
209
          if (length(private$raw_data@p) > 2147483647) {
            stop("Cannot support large CSC matrix")
          }
210
211
212
213
214
215
216
217
218
219
220
          # Are we using a dgCMatrix (sparsed matrix column compressed)
          handle <- lgb.call("LGBM_DatasetCreateFromCSC_R",
                             ret = handle,
                             private$raw_data@p,
                             private$raw_data@i,
                             private$raw_data@x,
                             length(private$raw_data@p),
                             length(private$raw_data@x),
                             nrow(private$raw_data),
                             params_str,
                             ref_handle)
James Lamb's avatar
James Lamb committed
221

Guolin Ke's avatar
Guolin Ke committed
222
        } else {
James Lamb's avatar
James Lamb committed
223

224
225
          # Unknown data type
          stop("lgb.Dataset.construct: does not support constructing from ", sQuote(class(private$raw_data)))
James Lamb's avatar
James Lamb committed
226

Guolin Ke's avatar
Guolin Ke committed
227
        }
James Lamb's avatar
James Lamb committed
228

Guolin Ke's avatar
Guolin Ke committed
229
      } else {
James Lamb's avatar
James Lamb committed
230

231
        # Reference is empty
Guolin Ke's avatar
Guolin Ke committed
232
        if (is.null(private$reference)) {
233
          stop("lgb.Dataset.construct: reference cannot be NULL for constructing data subset")
Guolin Ke's avatar
Guolin Ke committed
234
        }
James Lamb's avatar
James Lamb committed
235

236
237
238
239
        # Construct subset
        handle <- lgb.call("LGBM_DatasetGetSubset_R",
                           ret = handle,
                           ref_handle,
240
                           c(private$used_indices), # Adding c() fixes issue in R v3.5
241
242
                           length(private$used_indices),
                           params_str)
James Lamb's avatar
James Lamb committed
243

Guolin Ke's avatar
Guolin Ke committed
244
      }
Guolin Ke's avatar
Guolin Ke committed
245
246
247
      if (lgb.is.null.handle(handle)) {
        stop("lgb.Dataset.construct: cannot create Dataset handle")
      }
248
      # Setup class and private type
Guolin Ke's avatar
Guolin Ke committed
249
250
      class(handle) <- "lgb.Dataset.handle"
      private$handle <- handle
James Lamb's avatar
James Lamb committed
251

252
253
254
255
      # Set feature names
      if (!is.null(private$colnames)) {
        self$set_colnames(private$colnames)
      }
256

257
258
      # Load init score if requested
      if (!is.null(private$predictor) && is.null(private$used_indices)) {
James Lamb's avatar
James Lamb committed
259

260
        # Setup initial scores
261
        init_score <- private$predictor$predict(private$raw_data, rawscore = TRUE, reshape = TRUE)
James Lamb's avatar
James Lamb committed
262

263
        # Not needed to transpose, for is col_marjor
Guolin Ke's avatar
Guolin Ke committed
264
265
        init_score <- as.vector(init_score)
        private$info$init_score <- init_score
James Lamb's avatar
James Lamb committed
266

267
      }
James Lamb's avatar
James Lamb committed
268

269
270
271
      # Should we free raw data?
      if (isTRUE(private$free_raw_data)) {
        private$raw_data <- NULL
Guolin Ke's avatar
Guolin Ke committed
272
      }
James Lamb's avatar
James Lamb committed
273

274
      # Get private information
Guolin Ke's avatar
Guolin Ke committed
275
      if (length(private$info) > 0) {
James Lamb's avatar
James Lamb committed
276

277
        # Set infos
278
        for (i in seq_along(private$info)) {
James Lamb's avatar
James Lamb committed
279

Guolin Ke's avatar
Guolin Ke committed
280
281
          p <- private$info[i]
          self$setinfo(names(p), p[[1]])
James Lamb's avatar
James Lamb committed
282

Guolin Ke's avatar
Guolin Ke committed
283
        }
James Lamb's avatar
James Lamb committed
284

Guolin Ke's avatar
Guolin Ke committed
285
      }
James Lamb's avatar
James Lamb committed
286

287
      # Get label information existence
Guolin Ke's avatar
Guolin Ke committed
288
289
290
      if (is.null(self$getinfo("label"))) {
        stop("lgb.Dataset.construct: label should be set")
      }
James Lamb's avatar
James Lamb committed
291

292
293
      # Return self
      return(invisible(self))
James Lamb's avatar
James Lamb committed
294

Guolin Ke's avatar
Guolin Ke committed
295
    },
James Lamb's avatar
James Lamb committed
296

297
    # Dimension function
Guolin Ke's avatar
Guolin Ke committed
298
    dim = function() {
James Lamb's avatar
James Lamb committed
299

300
      # Check for handle
Guolin Ke's avatar
Guolin Ke committed
301
      if (!lgb.is.null.handle(private$handle)) {
James Lamb's avatar
James Lamb committed
302

303
304
        num_row <- 0L
        num_col <- 0L
James Lamb's avatar
James Lamb committed
305

306
307
308
        # Get numeric data and numeric features
        c(lgb.call("LGBM_DatasetGetNumData_R", ret = num_row, private$handle),
          lgb.call("LGBM_DatasetGetNumFeature_R", ret = num_col, private$handle))
James Lamb's avatar
James Lamb committed
309
310
311

      } else if (is.matrix(private$raw_data) || methods::is(private$raw_data, "dgCMatrix")) {

312
        # Check if dgCMatrix (sparse matrix column compressed)
313
        # NOTE: requires Matrix package
314
        dim(private$raw_data)
James Lamb's avatar
James Lamb committed
315

Guolin Ke's avatar
Guolin Ke committed
316
      } else {
James Lamb's avatar
James Lamb committed
317

318
319
        # Trying to work with unknown dimensions is not possible
        stop("dim: cannot get dimensions before dataset has been constructed, please call lgb.Dataset.construct explicitly")
James Lamb's avatar
James Lamb committed
320

Guolin Ke's avatar
Guolin Ke committed
321
      }
James Lamb's avatar
James Lamb committed
322

Guolin Ke's avatar
Guolin Ke committed
323
    },
James Lamb's avatar
James Lamb committed
324

325
    # Get column names
Guolin Ke's avatar
Guolin Ke committed
326
    get_colnames = function() {
James Lamb's avatar
James Lamb committed
327

328
      # Check for handle
Guolin Ke's avatar
Guolin Ke committed
329
      if (!lgb.is.null.handle(private$handle)) {
James Lamb's avatar
James Lamb committed
330

331
        # Get feature names and write them
332
333
334
        cnames <- lgb.call.return.str("LGBM_DatasetGetFeatureNames_R", private$handle)
        private$colnames <- as.character(base::strsplit(cnames, "\t")[[1]])
        private$colnames
James Lamb's avatar
James Lamb committed
335
336
337

      } else if (is.matrix(private$raw_data) || methods::is(private$raw_data, "dgCMatrix")) {

338
        # Check if dgCMatrix (sparse matrix column compressed)
339
        colnames(private$raw_data)
James Lamb's avatar
James Lamb committed
340

Guolin Ke's avatar
Guolin Ke committed
341
      } else {
James Lamb's avatar
James Lamb committed
342

343
344
        # Trying to work with unknown dimensions is not possible
        stop("dim: cannot get dimensions before dataset has been constructed, please call lgb.Dataset.construct explicitly")
James Lamb's avatar
James Lamb committed
345

Guolin Ke's avatar
Guolin Ke committed
346
      }
James Lamb's avatar
James Lamb committed
347

Guolin Ke's avatar
Guolin Ke committed
348
    },
James Lamb's avatar
James Lamb committed
349

350
    # Set column names
Guolin Ke's avatar
Guolin Ke committed
351
    set_colnames = function(colnames) {
James Lamb's avatar
James Lamb committed
352

353
354
      # Check column names non-existence
      if (is.null(colnames)) {
355
        return(invisible(self))
356
      }
James Lamb's avatar
James Lamb committed
357

358
      # Check empty column names
Guolin Ke's avatar
Guolin Ke committed
359
      colnames <- as.character(colnames)
360
      if (length(colnames) == 0) {
361
        return(invisible(self))
362
      }
James Lamb's avatar
James Lamb committed
363

364
      # Write column names
Guolin Ke's avatar
Guolin Ke committed
365
366
      private$colnames <- colnames
      if (!lgb.is.null.handle(private$handle)) {
James Lamb's avatar
James Lamb committed
367

368
        # Merge names with tab separation
Guolin Ke's avatar
Guolin Ke committed
369
370
371
372
373
        merged_name <- paste0(as.list(private$colnames), collapse = "\t")
        lgb.call("LGBM_DatasetSetFeatureNames_R",
                 ret = NULL,
                 private$handle,
                 lgb.c_str(merged_name))
James Lamb's avatar
James Lamb committed
374

Guolin Ke's avatar
Guolin Ke committed
375
      }
James Lamb's avatar
James Lamb committed
376

377
      # Return self
378
      return(invisible(self))
James Lamb's avatar
James Lamb committed
379

Guolin Ke's avatar
Guolin Ke committed
380
    },
James Lamb's avatar
James Lamb committed
381

382
    # Get information
Guolin Ke's avatar
Guolin Ke committed
383
    getinfo = function(name) {
James Lamb's avatar
James Lamb committed
384

385
      # Create known attributes list
386
      INFONAMES <- c("label", "weight", "init_score", "group")
James Lamb's avatar
James Lamb committed
387

388
389
390
      # Check if attribute key is in the known attribute list
      if (!is.character(name) || length(name) != 1 || !name %in% INFONAMES) {
        stop("getinfo: name must one of the following: ", paste0(sQuote(INFONAMES), collapse = ", "))
Guolin Ke's avatar
Guolin Ke committed
391
      }
James Lamb's avatar
James Lamb committed
392

393
      # Check for info name and handle
394
      if (is.null(private$info[[name]])) {
395

396
        if (lgb.is.null.handle(private$handle)){
397
          stop("Cannot perform getinfo before constructing Dataset.")
398
        }
399

400
        # Get field size of info
401
        info_len <- 0L
402
403
404
405
        info_len <- lgb.call("LGBM_DatasetGetFieldSize_R",
                             ret = info_len,
                             private$handle,
                             lgb.c_str(name))
James Lamb's avatar
James Lamb committed
406

407
        # Check if info is not empty
Guolin Ke's avatar
Guolin Ke committed
408
        if (info_len > 0) {
James Lamb's avatar
James Lamb committed
409

410
          # Get back fields
Guolin Ke's avatar
Guolin Ke committed
411
          ret <- NULL
412
413
414
415
416
          ret <- if (name == "group") {
            integer(info_len) # Integer
          } else {
            numeric(info_len) # Numeric
          }
James Lamb's avatar
James Lamb committed
417

418
419
420
421
          ret <- lgb.call("LGBM_DatasetGetField_R",
                          ret = ret,
                          private$handle,
                          lgb.c_str(name))
James Lamb's avatar
James Lamb committed
422

Guolin Ke's avatar
Guolin Ke committed
423
          private$info[[name]] <- ret
James Lamb's avatar
James Lamb committed
424

Guolin Ke's avatar
Guolin Ke committed
425
426
        }
      }
James Lamb's avatar
James Lamb committed
427

428
      private$info[[name]]
James Lamb's avatar
James Lamb committed
429

Guolin Ke's avatar
Guolin Ke committed
430
    },
James Lamb's avatar
James Lamb committed
431

432
    # Set information
Guolin Ke's avatar
Guolin Ke committed
433
    setinfo = function(name, info) {
James Lamb's avatar
James Lamb committed
434

435
      # Create known attributes list
436
      INFONAMES <- c("label", "weight", "init_score", "group")
James Lamb's avatar
James Lamb committed
437

438
439
440
441
      # Check if attribute key is in the known attribute list
      if (!is.character(name) || length(name) != 1 || !name %in% INFONAMES) {
        stop("setinfo: name must one of the following: ", paste0(sQuote(INFONAMES), collapse = ", "))
      }
James Lamb's avatar
James Lamb committed
442

443
444
445
446
447
448
      # Check for type of information
      info <- if (name == "group") {
        as.integer(info) # Integer
      } else {
        as.numeric(info) # Numeric
      }
James Lamb's avatar
James Lamb committed
449

450
      # Store information privately
Guolin Ke's avatar
Guolin Ke committed
451
      private$info[[name]] <- info
James Lamb's avatar
James Lamb committed
452

453
      if (!lgb.is.null.handle(private$handle) && !is.null(info)) {
James Lamb's avatar
James Lamb committed
454

Guolin Ke's avatar
Guolin Ke committed
455
        if (length(info) > 0) {
James Lamb's avatar
James Lamb committed
456

457
458
459
460
461
462
          lgb.call("LGBM_DatasetSetField_R",
                   ret = NULL,
                   private$handle,
                   lgb.c_str(name),
                   info,
                   length(info))
James Lamb's avatar
James Lamb committed
463

Guolin Ke's avatar
Guolin Ke committed
464
        }
James Lamb's avatar
James Lamb committed
465

Guolin Ke's avatar
Guolin Ke committed
466
      }
James Lamb's avatar
James Lamb committed
467

468
      # Return self
469
      return(invisible(self))
James Lamb's avatar
James Lamb committed
470

Guolin Ke's avatar
Guolin Ke committed
471
    },
James Lamb's avatar
James Lamb committed
472

473
    # Slice dataset
Guolin Ke's avatar
Guolin Ke committed
474
    slice = function(idxset, ...) {
James Lamb's avatar
James Lamb committed
475

476
477
478
479
480
481
482
483
484
485
486
      # Perform slicing
      Dataset$new(NULL,
                  private$params,
                  self,
                  private$colnames,
                  private$categorical_feature,
                  private$predictor,
                  private$free_raw_data,
                  idxset,
                  NULL,
                  ...)
James Lamb's avatar
James Lamb committed
487

Guolin Ke's avatar
Guolin Ke committed
488
    },
James Lamb's avatar
James Lamb committed
489

490
    # Update parameters
491
    update_params = function(params) {
James Lamb's avatar
James Lamb committed
492

493
      # Parameter updating
494
495
496
497
      if (!lgb.is.null.handle(private$handle)) {
        lgb.call("LGBM_DatasetUpdateParam_R", ret = NULL, private$handle, lgb.params2str(params))
        return(invisible(self))
      }
Guolin Ke's avatar
Guolin Ke committed
498
      private$params <- modifyList(private$params, params)
499
      return(invisible(self))
James Lamb's avatar
James Lamb committed
500

Guolin Ke's avatar
Guolin Ke committed
501
    },
James Lamb's avatar
James Lamb committed
502

503
    # Set categorical feature parameter
504
    set_categorical_feature = function(categorical_feature) {
James Lamb's avatar
James Lamb committed
505

506
507
      # Check for identical input
      if (identical(private$categorical_feature, categorical_feature)) {
508
        return(invisible(self))
509
      }
James Lamb's avatar
James Lamb committed
510

511
      # Check for empty data
512
      if (is.null(private$raw_data)) {
513
514
        stop("set_categorical_feature: cannot set categorical feature after freeing raw data,
          please set ", sQuote("free_raw_data = FALSE"), " when you construct lgb.Dataset")
515
      }
James Lamb's avatar
James Lamb committed
516

517
      # Overwrite categorical features
518
      private$categorical_feature <- categorical_feature
James Lamb's avatar
James Lamb committed
519

520
      # Finalize and return self
521
      self$finalize()
522
      return(invisible(self))
James Lamb's avatar
James Lamb committed
523

524
    },
James Lamb's avatar
James Lamb committed
525

526
    # Set reference
Guolin Ke's avatar
Guolin Ke committed
527
    set_reference = function(reference) {
James Lamb's avatar
James Lamb committed
528

529
      # Set known references
530
      self$set_categorical_feature(reference$.__enclos_env__$private$categorical_feature)
Guolin Ke's avatar
Guolin Ke committed
531
532
      self$set_colnames(reference$get_colnames())
      private$set_predictor(reference$.__enclos_env__$private$predictor)
James Lamb's avatar
James Lamb committed
533

534
535
      # Check for identical references
      if (identical(private$reference, reference)) {
536
        return(invisible(self))
537
      }
James Lamb's avatar
James Lamb committed
538

539
      # Check for empty data
Guolin Ke's avatar
Guolin Ke committed
540
      if (is.null(private$raw_data)) {
James Lamb's avatar
James Lamb committed
541

542
543
        stop("set_reference: cannot set reference after freeing raw data,
          please set ", sQuote("free_raw_data = FALSE"), " when you construct lgb.Dataset")
James Lamb's avatar
James Lamb committed
544

Guolin Ke's avatar
Guolin Ke committed
545
      }
James Lamb's avatar
James Lamb committed
546

547
      # Check for non-existing reference
Guolin Ke's avatar
Guolin Ke committed
548
      if (!is.null(reference)) {
James Lamb's avatar
James Lamb committed
549

550
        # Reference is unknown
Guolin Ke's avatar
Guolin Ke committed
551
        if (!lgb.check.r6.class(reference, "lgb.Dataset")) {
552
          stop("set_reference: Can only use lgb.Dataset as a reference")
Guolin Ke's avatar
Guolin Ke committed
553
        }
James Lamb's avatar
James Lamb committed
554

Guolin Ke's avatar
Guolin Ke committed
555
      }
James Lamb's avatar
James Lamb committed
556

557
      # Store reference
Guolin Ke's avatar
Guolin Ke committed
558
      private$reference <- reference
James Lamb's avatar
James Lamb committed
559

560
      # Finalize and return self
Guolin Ke's avatar
Guolin Ke committed
561
      self$finalize()
562
      return(invisible(self))
James Lamb's avatar
James Lamb committed
563

Guolin Ke's avatar
Guolin Ke committed
564
    },
James Lamb's avatar
James Lamb committed
565

566
    # Save binary model
Guolin Ke's avatar
Guolin Ke committed
567
    save_binary = function(fname) {
James Lamb's avatar
James Lamb committed
568

569
      # Store binary data
Guolin Ke's avatar
Guolin Ke committed
570
571
572
573
574
      self$construct()
      lgb.call("LGBM_DatasetSaveBinary_R",
               ret = NULL,
               private$handle,
               lgb.c_str(fname))
575
      return(invisible(self))
Guolin Ke's avatar
Guolin Ke committed
576
    }
James Lamb's avatar
James Lamb committed
577

Guolin Ke's avatar
Guolin Ke committed
578
579
  ),
  private = list(
580
581
582
583
584
    handle = NULL,
    raw_data = NULL,
    params = list(),
    reference = NULL,
    colnames = NULL,
585
    categorical_feature = NULL,
586
587
588
589
    predictor = NULL,
    free_raw_data = TRUE,
    used_indices = NULL,
    info = NULL,
James Lamb's avatar
James Lamb committed
590

591
592
    # Get handle
    get_handle = function() {
James Lamb's avatar
James Lamb committed
593

594
595
596
597
      # Get handle and construct if needed
      if (lgb.is.null.handle(private$handle)) {
        self$construct()
      }
598
      private$handle
James Lamb's avatar
James Lamb committed
599

Guolin Ke's avatar
Guolin Ke committed
600
    },
James Lamb's avatar
James Lamb committed
601

602
    # Set predictor
Guolin Ke's avatar
Guolin Ke committed
603
    set_predictor = function(predictor) {
James Lamb's avatar
James Lamb committed
604

605
606
      # Return self is identical predictor
      if (identical(private$predictor, predictor)) {
607
        return(invisible(self))
608
      }
James Lamb's avatar
James Lamb committed
609

610
      # Check for empty data
Guolin Ke's avatar
Guolin Ke committed
611
      if (is.null(private$raw_data)) {
612
613
        stop("set_predictor: cannot set predictor after free raw data,
          please set ", sQuote("free_raw_data = FALSE"), " when you construct lgb.Dataset")
Guolin Ke's avatar
Guolin Ke committed
614
      }
James Lamb's avatar
James Lamb committed
615

616
      # Check for empty predictor
Guolin Ke's avatar
Guolin Ke committed
617
      if (!is.null(predictor)) {
James Lamb's avatar
James Lamb committed
618

619
        # Predictor is unknown
Guolin Ke's avatar
Guolin Ke committed
620
        if (!lgb.check.r6.class(predictor, "lgb.Predictor")) {
621
          stop("set_predictor: Can only use lgb.Predictor as predictor")
Guolin Ke's avatar
Guolin Ke committed
622
        }
James Lamb's avatar
James Lamb committed
623

Guolin Ke's avatar
Guolin Ke committed
624
      }
James Lamb's avatar
James Lamb committed
625

626
      # Store predictor
Guolin Ke's avatar
Guolin Ke committed
627
      private$predictor <- predictor
James Lamb's avatar
James Lamb committed
628

629
      # Finalize and return self
Guolin Ke's avatar
Guolin Ke committed
630
      self$finalize()
631
      return(invisible(self))
James Lamb's avatar
James Lamb committed
632

Guolin Ke's avatar
Guolin Ke committed
633
    }
James Lamb's avatar
James Lamb committed
634

Guolin Ke's avatar
Guolin Ke committed
635
636
637
  )
)

wxchan's avatar
wxchan committed
638
#' Construct lgb.Dataset object
Guolin Ke's avatar
Guolin Ke committed
639
#'
wxchan's avatar
wxchan committed
640
#' Construct lgb.Dataset object from dense matrix, sparse matrix
Guolin Ke's avatar
Guolin Ke committed
641
642
643
644
645
646
#' or local file (that was created previously by saving an \code{lgb.Dataset}).
#'
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
#' @param params a list of parameters
#' @param reference reference dataset
#' @param colnames names of columns
647
#' @param categorical_feature categorical features
Guolin Ke's avatar
Guolin Ke committed
648
649
650
#' @param free_raw_data TRUE for need to free raw data after construct
#' @param info a list of information of the lgb.Dataset object
#' @param ... other information to pass to \code{info} or parameters pass to \code{params}
James Lamb's avatar
James Lamb committed
651
#'
Guolin Ke's avatar
Guolin Ke committed
652
#' @return constructed dataset
James Lamb's avatar
James Lamb committed
653
#'
Guolin Ke's avatar
Guolin Ke committed
654
#' @examples
655
656
657
658
659
660
661
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.save(dtrain, "lgb.Dataset.data")
#' dtrain <- lgb.Dataset("lgb.Dataset.data")
#' lgb.Dataset.construct(dtrain)
James Lamb's avatar
James Lamb committed
662
#'
Guolin Ke's avatar
Guolin Ke committed
663
664
#' @export
lgb.Dataset <- function(data,
665
666
667
                        params = list(),
                        reference = NULL,
                        colnames = NULL,
668
                        categorical_feature = NULL,
669
670
                        free_raw_data = TRUE,
                        info = list(),
Guolin Ke's avatar
Guolin Ke committed
671
                        ...) {
James Lamb's avatar
James Lamb committed
672

673
  # Create new dataset
674
  invisible(Dataset$new(data,
675
676
677
678
679
680
681
682
              params,
              reference,
              colnames,
              categorical_feature,
              NULL,
              free_raw_data,
              NULL,
              info,
683
              ...))
James Lamb's avatar
James Lamb committed
684

Guolin Ke's avatar
Guolin Ke committed
685
686
}

wxchan's avatar
wxchan committed
687
#' Construct validation data
James Lamb's avatar
James Lamb committed
688
#'
wxchan's avatar
wxchan committed
689
#' Construct validation data according to training data
James Lamb's avatar
James Lamb committed
690
#'
Guolin Ke's avatar
Guolin Ke committed
691
692
693
694
#' @param dataset \code{lgb.Dataset} object, training data
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
#' @param info a list of information of the lgb.Dataset object
#' @param ... other information to pass to \code{info}.
James Lamb's avatar
James Lamb committed
695
#'
Guolin Ke's avatar
Guolin Ke committed
696
#' @return constructed dataset
James Lamb's avatar
James Lamb committed
697
#'
Guolin Ke's avatar
Guolin Ke committed
698
#' @examples
699
700
701
702
703
704
705
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' data(agaricus.test, package = "lightgbm")
#' test <- agaricus.test
#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
James Lamb's avatar
James Lamb committed
706
#'
Guolin Ke's avatar
Guolin Ke committed
707
#' @export
708
lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) {
James Lamb's avatar
James Lamb committed
709

710
  # Check if dataset is not a dataset
711
712
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.create.valid: input data should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
713
  }
James Lamb's avatar
James Lamb committed
714

715
  # Create validation dataset
716
  invisible(dataset$create_valid(data, info, ...))
James Lamb's avatar
James Lamb committed
717

718
}
Guolin Ke's avatar
Guolin Ke committed
719

720
#' Construct Dataset explicitly
James Lamb's avatar
James Lamb committed
721
#'
Guolin Ke's avatar
Guolin Ke committed
722
#' @param dataset Object of class \code{lgb.Dataset}
James Lamb's avatar
James Lamb committed
723
#'
Guolin Ke's avatar
Guolin Ke committed
724
#' @examples
725
726
727
728
729
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.construct(dtrain)
James Lamb's avatar
James Lamb committed
730
#'
Guolin Ke's avatar
Guolin Ke committed
731
732
#' @export
lgb.Dataset.construct <- function(dataset) {
James Lamb's avatar
James Lamb committed
733

734
  # Check if dataset is not a dataset
735
736
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.construct: input data should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
737
  }
James Lamb's avatar
James Lamb committed
738

739
  # Construct the dataset
740
  invisible(dataset$construct())
James Lamb's avatar
James Lamb committed
741

Guolin Ke's avatar
Guolin Ke committed
742
743
}

744
#' Dimensions of an lgb.Dataset
James Lamb's avatar
James Lamb committed
745
#'
Guolin Ke's avatar
Guolin Ke committed
746
747
748
#' Returns a vector of numbers of rows and of columns in an \code{lgb.Dataset}.
#' @param x Object of class \code{lgb.Dataset}
#' @param ... other parameters
James Lamb's avatar
James Lamb committed
749
#'
Guolin Ke's avatar
Guolin Ke committed
750
#' @return a vector of numbers of rows and of columns
James Lamb's avatar
James Lamb committed
751
#'
Guolin Ke's avatar
Guolin Ke committed
752
753
754
#' @details
#' Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also
#' be directly used with an \code{lgb.Dataset} object.
James Lamb's avatar
James Lamb committed
755
#'
Guolin Ke's avatar
Guolin Ke committed
756
#' @examples
757
758
759
760
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
James Lamb's avatar
James Lamb committed
761
#'
762
763
764
#' stopifnot(nrow(dtrain) == nrow(train$data))
#' stopifnot(ncol(dtrain) == ncol(train$data))
#' stopifnot(all(dim(dtrain) == dim(train$data)))
James Lamb's avatar
James Lamb committed
765
#'
Guolin Ke's avatar
Guolin Ke committed
766
767
768
#' @rdname dim
#' @export
dim.lgb.Dataset <- function(x, ...) {
James Lamb's avatar
James Lamb committed
769

770
  # Check if dataset is not a dataset
771
772
  if (!lgb.is.Dataset(x)) {
    stop("dim.lgb.Dataset: input data should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
773
  }
James Lamb's avatar
James Lamb committed
774

775
  # Return dimensions
776
  x$dim()
James Lamb's avatar
James Lamb committed
777

Guolin Ke's avatar
Guolin Ke committed
778
779
780
781
782
}

#' Handling of column names of \code{lgb.Dataset}
#'
#' Only column names are supported for \code{lgb.Dataset}, thus setting of
783
#' row names would have no effect and returned row names would be NULL.
Guolin Ke's avatar
Guolin Ke committed
784
785
786
787
788
789
790
791
792
793
#'
#' @param x object of class \code{lgb.Dataset}
#' @param value a list of two elements: the first one is ignored
#'        and the second one is column names
#'
#' @details
#' Generic \code{dimnames} methods are used by \code{colnames}.
#' Since row names are irrelevant, it is recommended to use \code{colnames} directly.
#'
#' @examples
794
795
796
797
798
799
800
801
802
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.construct(dtrain)
#' dimnames(dtrain)
#' colnames(dtrain)
#' colnames(dtrain) <- make.names(1:ncol(train$data))
#' print(dtrain, verbose = TRUE)
James Lamb's avatar
James Lamb committed
803
#'
Guolin Ke's avatar
Guolin Ke committed
804
805
806
#' @rdname dimnames.lgb.Dataset
#' @export
dimnames.lgb.Dataset <- function(x) {
James Lamb's avatar
James Lamb committed
807

808
  # Check if dataset is not a dataset
809
810
  if (!lgb.is.Dataset(x)) {
    stop("dimnames.lgb.Dataset: input data should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
811
  }
James Lamb's avatar
James Lamb committed
812

813
  # Return dimension names
814
  list(NULL, x$get_colnames())
James Lamb's avatar
James Lamb committed
815

Guolin Ke's avatar
Guolin Ke committed
816
817
818
819
820
}

#' @rdname dimnames.lgb.Dataset
#' @export
`dimnames<-.lgb.Dataset` <- function(x, value) {
James Lamb's avatar
James Lamb committed
821

822
823
  # Check if invalid element list
  if (!is.list(value) || length(value) != 2L) {
824
    stop("invalid ", sQuote("value"), " given: must be a list of two elements")
825
  }
James Lamb's avatar
James Lamb committed
826

827
828
829
830
  # Check for unknown row names
  if (!is.null(value[[1L]])) {
    stop("lgb.Dataset does not have rownames")
  }
James Lamb's avatar
James Lamb committed
831

832
  # Check for second value missing
Guolin Ke's avatar
Guolin Ke committed
833
  if (is.null(value[[2]])) {
James Lamb's avatar
James Lamb committed
834

835
    # No column names
Guolin Ke's avatar
Guolin Ke committed
836
837
    x$set_colnames(NULL)
    return(x)
James Lamb's avatar
James Lamb committed
838

839
  }
James Lamb's avatar
James Lamb committed
840

841
842
843
  # Check for unmatching column size
  if (ncol(x) != length(value[[2]])) {
    stop("can't assign ", sQuote(length(value[[2]])), " colnames to an lgb.Dataset with ", sQuote(ncol(x)), " columns")
Guolin Ke's avatar
Guolin Ke committed
844
  }
James Lamb's avatar
James Lamb committed
845

846
  # Set column names properly, and return
Guolin Ke's avatar
Guolin Ke committed
847
  x$set_colnames(value[[2]])
848
  x
James Lamb's avatar
James Lamb committed
849

Guolin Ke's avatar
Guolin Ke committed
850
851
}

852
#' Slice a dataset
James Lamb's avatar
James Lamb committed
853
#'
854
#' Get a new \code{lgb.Dataset} containing the specified rows of
James Lamb's avatar
James Lamb committed
855
#' original lgb.Dataset object
856
#'
Guolin Ke's avatar
Guolin Ke committed
857
858
859
860
#' @param dataset Object of class "lgb.Dataset"
#' @param idxset a integer vector of indices of rows needed
#' @param ... other parameters (currently not used)
#' @return constructed sub dataset
James Lamb's avatar
James Lamb committed
861
#'
Guolin Ke's avatar
Guolin Ke committed
862
#' @examples
863
864
865
866
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
James Lamb's avatar
James Lamb committed
867
#'
868
#' dsub <- lightgbm::slice(dtrain, 1:42)
869
#' lgb.Dataset.construct(dsub)
870
#' labels <- lightgbm::getinfo(dsub, "label")
James Lamb's avatar
James Lamb committed
871
#'
Guolin Ke's avatar
Guolin Ke committed
872
#' @export
873
874
875
slice <- function(dataset, ...) {
  UseMethod("slice")
}
Guolin Ke's avatar
Guolin Ke committed
876
877
878
879

#' @rdname slice
#' @export
slice.lgb.Dataset <- function(dataset, idxset, ...) {
James Lamb's avatar
James Lamb committed
880

881
  # Check if dataset is not a dataset
882
883
  if (!lgb.is.Dataset(dataset)) {
    stop("slice.lgb.Dataset: input dataset should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
884
  }
James Lamb's avatar
James Lamb committed
885

886
  # Return sliced set
887
  invisible(dataset$slice(idxset, ...))
James Lamb's avatar
James Lamb committed
888

Guolin Ke's avatar
Guolin Ke committed
889
890
891
}

#' Get information of an lgb.Dataset object
James Lamb's avatar
James Lamb committed
892
#'
Guolin Ke's avatar
Guolin Ke committed
893
894
895
896
#' @param dataset Object of class \code{lgb.Dataset}
#' @param name the name of the information field to get (see details)
#' @param ... other parameters
#' @return info data
James Lamb's avatar
James Lamb committed
897
#'
Guolin Ke's avatar
Guolin Ke committed
898
899
#' @details
#' The \code{name} field can be one of the following:
James Lamb's avatar
James Lamb committed
900
#'
Guolin Ke's avatar
Guolin Ke committed
901
902
903
904
905
906
#' \itemize{
#'     \item \code{label}: label lightgbm learn from ;
#'     \item \code{weight}: to do a weight rescale ;
#'     \item \code{group}: group size
#'     \item \code{init_score}: initial score is the base prediction lightgbm will boost from ;
#' }
James Lamb's avatar
James Lamb committed
907
#'
Guolin Ke's avatar
Guolin Ke committed
908
#' @examples
909
910
911
912
913
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.construct(dtrain)
James Lamb's avatar
James Lamb committed
914
#'
915
916
#' labels <- lightgbm::getinfo(dtrain, "label")
#' lightgbm::setinfo(dtrain, "label", 1 - labels)
James Lamb's avatar
James Lamb committed
917
#'
918
919
#' labels2 <- lightgbm::getinfo(dtrain, "label")
#' stopifnot(all(labels2 == 1 - labels))
James Lamb's avatar
James Lamb committed
920
#'
Guolin Ke's avatar
Guolin Ke committed
921
#' @export
922
923
924
getinfo <- function(dataset, ...) {
  UseMethod("getinfo")
}
Guolin Ke's avatar
Guolin Ke committed
925
926
927
928

#' @rdname getinfo
#' @export
getinfo.lgb.Dataset <- function(dataset, name, ...) {
James Lamb's avatar
James Lamb committed
929

930
  # Check if dataset is not a dataset
931
932
  if (!lgb.is.Dataset(dataset)) {
    stop("getinfo.lgb.Dataset: input dataset should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
933
  }
James Lamb's avatar
James Lamb committed
934

935
  # Return information
936
  dataset$getinfo(name)
James Lamb's avatar
James Lamb committed
937

Guolin Ke's avatar
Guolin Ke committed
938
939
940
}

#' Set information of an lgb.Dataset object
James Lamb's avatar
James Lamb committed
941
#'
Guolin Ke's avatar
Guolin Ke committed
942
943
944
945
946
#' @param dataset Object of class "lgb.Dataset"
#' @param name the name of the field to get
#' @param info the specific field of information to set
#' @param ... other parameters
#' @return passed object
James Lamb's avatar
James Lamb committed
947
#'
Guolin Ke's avatar
Guolin Ke committed
948
949
#' @details
#' The \code{name} field can be one of the following:
James Lamb's avatar
James Lamb committed
950
#'
Guolin Ke's avatar
Guolin Ke committed
951
952
953
954
955
956
#' \itemize{
#'     \item \code{label}: label lightgbm learn from ;
#'     \item \code{weight}: to do a weight rescale ;
#'     \item \code{init_score}: initial score is the base prediction lightgbm will boost from ;
#'     \item \code{group}.
#' }
James Lamb's avatar
James Lamb committed
957
#'
Guolin Ke's avatar
Guolin Ke committed
958
#' @examples
959
960
961
962
963
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.construct(dtrain)
James Lamb's avatar
James Lamb committed
964
#'
965
966
#' labels <- lightgbm::getinfo(dtrain, "label")
#' lightgbm::setinfo(dtrain, "label", 1 - labels)
James Lamb's avatar
James Lamb committed
967
#'
968
969
#' labels2 <- lightgbm::getinfo(dtrain, "label")
#' stopifnot(all.equal(labels2, 1 - labels))
James Lamb's avatar
James Lamb committed
970
#'
Guolin Ke's avatar
Guolin Ke committed
971
#' @export
972
973
974
setinfo <- function(dataset, ...) {
  UseMethod("setinfo")
}
Guolin Ke's avatar
Guolin Ke committed
975
976
977
978

#' @rdname setinfo
#' @export
setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
James Lamb's avatar
James Lamb committed
979

980
  # Check if dataset is not a dataset
981
982
  if (!lgb.is.Dataset(dataset)) {
    stop("setinfo.lgb.Dataset: input dataset should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
983
  }
James Lamb's avatar
James Lamb committed
984

985
  # Set information
986
  invisible(dataset$setinfo(name, info))
Guolin Ke's avatar
Guolin Ke committed
987
988
}

989
#' Set categorical feature of \code{lgb.Dataset}
James Lamb's avatar
James Lamb committed
990
#'
991
992
#' @param dataset object of class \code{lgb.Dataset}
#' @param categorical_feature categorical features
James Lamb's avatar
James Lamb committed
993
#'
994
#' @return passed dataset
James Lamb's avatar
James Lamb committed
995
#'
996
#' @examples
997
998
999
1000
1001
1002
1003
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.save(dtrain, "lgb.Dataset.data")
#' dtrain <- lgb.Dataset("lgb.Dataset.data")
#' lgb.Dataset.set.categorical(dtrain, 1:2)
James Lamb's avatar
James Lamb committed
1004
#'
1005
1006
1007
#' @rdname lgb.Dataset.set.categorical
#' @export
lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
James Lamb's avatar
James Lamb committed
1008

1009
  # Check if dataset is not a dataset
1010
1011
1012
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.set.categorical: input dataset should be an lgb.Dataset object")
  }
James Lamb's avatar
James Lamb committed
1013

1014
  # Set categoricals
1015
  invisible(dataset$set_categorical_feature(categorical_feature))
James Lamb's avatar
James Lamb committed
1016

1017
1018
}

1019
#' Set reference of \code{lgb.Dataset}
James Lamb's avatar
James Lamb committed
1020
#'
1021
#' If you want to use validation data, you should set reference to training data
James Lamb's avatar
James Lamb committed
1022
#'
Guolin Ke's avatar
Guolin Ke committed
1023
1024
#' @param dataset object of class \code{lgb.Dataset}
#' @param reference object of class \code{lgb.Dataset}
James Lamb's avatar
James Lamb committed
1025
#'
Guolin Ke's avatar
Guolin Ke committed
1026
#' @return passed dataset
James Lamb's avatar
James Lamb committed
1027
#'
Guolin Ke's avatar
Guolin Ke committed
1028
#' @examples
1029
1030
1031
1032
1033
1034
1035
1036
#' library(lightgbm)
#' data(agaricus.train, package ="lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' data(agaricus.test, package = "lightgbm")
#' test <- agaricus.test
#' dtest <- lgb.Dataset(test$data, test = train$label)
#' lgb.Dataset.set.reference(dtest, dtrain)
James Lamb's avatar
James Lamb committed
1037
#'
Guolin Ke's avatar
Guolin Ke committed
1038
1039
1040
#' @rdname lgb.Dataset.set.reference
#' @export
lgb.Dataset.set.reference <- function(dataset, reference) {
James Lamb's avatar
James Lamb committed
1041

1042
  # Check if dataset is not a dataset
1043
1044
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.set.reference: input dataset should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
1045
  }
James Lamb's avatar
James Lamb committed
1046

1047
  # Set reference
1048
  invisible(dataset$set_reference(reference))
Guolin Ke's avatar
Guolin Ke committed
1049
1050
}

1051
#' Save \code{lgb.Dataset} to a binary file
James Lamb's avatar
James Lamb committed
1052
#'
Guolin Ke's avatar
Guolin Ke committed
1053
1054
#' @param dataset object of class \code{lgb.Dataset}
#' @param fname object filename of output file
James Lamb's avatar
James Lamb committed
1055
#'
Guolin Ke's avatar
Guolin Ke committed
1056
#' @return passed dataset
James Lamb's avatar
James Lamb committed
1057
#'
Guolin Ke's avatar
Guolin Ke committed
1058
#' @examples
1059
1060
1061
1062
1063
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.save(dtrain, "data.bin")
James Lamb's avatar
James Lamb committed
1064
#'
Guolin Ke's avatar
Guolin Ke committed
1065
1066
1067
#' @rdname lgb.Dataset.save
#' @export
lgb.Dataset.save <- function(dataset, fname) {
James Lamb's avatar
James Lamb committed
1068

1069
  # Check if dataset is not a dataset
1070
1071
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.set: input dataset should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
1072
  }
James Lamb's avatar
James Lamb committed
1073

1074
  # File-type is not matching
1075
1076
  if (!is.character(fname)) {
    stop("lgb.Dataset.set: fname should be a character or a file connection")
Guolin Ke's avatar
Guolin Ke committed
1077
  }
James Lamb's avatar
James Lamb committed
1078

1079
  # Store binary
1080
  invisible(dataset$save_binary(fname))
Guolin Ke's avatar
Guolin Ke committed
1081
}