lgb.Dataset.R 29.8 KB
Newer Older
James Lamb's avatar
James Lamb committed
1
2

#' @importFrom methods is
James Lamb's avatar
James Lamb committed
3
4
5
#' @importFrom R6 R6Class
Dataset <- R6::R6Class(

6
  classname = "lgb.Dataset",
7
  cloneable = FALSE,
Guolin Ke's avatar
Guolin Ke committed
8
  public = list(
James Lamb's avatar
James Lamb committed
9

10
    # Finalize will free up the handles
Guolin Ke's avatar
Guolin Ke committed
11
    finalize = function() {
James Lamb's avatar
James Lamb committed
12

13
      # Check the need for freeing handle
Guolin Ke's avatar
Guolin Ke committed
14
      if (!lgb.is.null.handle(private$handle)) {
James Lamb's avatar
James Lamb committed
15

16
        # Freeing up handle
Guolin Ke's avatar
Guolin Ke committed
17
18
        lgb.call("LGBM_DatasetFree_R", ret = NULL, private$handle)
        private$handle <- NULL
James Lamb's avatar
James Lamb committed
19

Guolin Ke's avatar
Guolin Ke committed
20
      }
James Lamb's avatar
James Lamb committed
21

Guolin Ke's avatar
Guolin Ke committed
22
    },
James Lamb's avatar
James Lamb committed
23

24
    # Initialize will create a starter dataset
Guolin Ke's avatar
Guolin Ke committed
25
    initialize = function(data,
26
27
28
                          params = list(),
                          reference = NULL,
                          colnames = NULL,
29
                          categorical_feature = NULL,
30
31
32
33
                          predictor = NULL,
                          free_raw_data = TRUE,
                          used_indices = NULL,
                          info = list(),
Guolin Ke's avatar
Guolin Ke committed
34
                          ...) {
James Lamb's avatar
James Lamb committed
35

36
      # Check for additional parameters
37
      additional_params <- list(...)
James Lamb's avatar
James Lamb committed
38

39
40
      # Create known attributes list
      INFO_KEYS <- c("label", "weight", "init_score", "group")
James Lamb's avatar
James Lamb committed
41

42
      # Check if attribute key is in the known attribute list
43
      for (key in names(additional_params)) {
James Lamb's avatar
James Lamb committed
44

45
        # Key existing
46
        if (key %in% INFO_KEYS) {
James Lamb's avatar
James Lamb committed
47

48
          # Store as info
49
          info[[key]] <- additional_params[[key]]
James Lamb's avatar
James Lamb committed
50

Guolin Ke's avatar
Guolin Ke committed
51
        } else {
James Lamb's avatar
James Lamb committed
52

53
          # Store as param
54
          params[[key]] <- additional_params[[key]]
James Lamb's avatar
James Lamb committed
55

Guolin Ke's avatar
Guolin Ke committed
56
        }
James Lamb's avatar
James Lamb committed
57

Guolin Ke's avatar
Guolin Ke committed
58
      }
James Lamb's avatar
James Lamb committed
59

60
      # Check for dataset reference
Guolin Ke's avatar
Guolin Ke committed
61
62
      if (!is.null(reference)) {
        if (!lgb.check.r6.class(reference, "lgb.Dataset")) {
63
          stop("lgb.Dataset: Can only use ", sQuote("lgb.Dataset"), " as reference")
Guolin Ke's avatar
Guolin Ke committed
64
65
        }
      }
James Lamb's avatar
James Lamb committed
66

67
      # Check for predictor reference
Guolin Ke's avatar
Guolin Ke committed
68
69
      if (!is.null(predictor)) {
        if (!lgb.check.r6.class(predictor, "lgb.Predictor")) {
70
          stop("lgb.Dataset: Only can use ", sQuote("lgb.Predictor"), " as predictor")
Guolin Ke's avatar
Guolin Ke committed
71
72
        }
      }
James Lamb's avatar
James Lamb committed
73

74
75
76
77
78
79
80
      # Check for matrix format
      if (is.matrix(data)) {
        # Check whether matrix is the correct type first ("double")
        if (storage.mode(data) != "double") {
          storage.mode(data) <- "double"
        }
      }
James Lamb's avatar
James Lamb committed
81

82
83
84
      # Setup private attributes
      private$raw_data <- data
      private$params <- params
Guolin Ke's avatar
Guolin Ke committed
85
      private$reference <- reference
86
      private$colnames <- colnames
87

88
      private$categorical_feature <- categorical_feature
89
90
91
92
      private$predictor <- predictor
      private$free_raw_data <- free_raw_data
      private$used_indices <- used_indices
      private$info <- info
James Lamb's avatar
James Lamb committed
93

Guolin Ke's avatar
Guolin Ke committed
94
    },
James Lamb's avatar
James Lamb committed
95

96
97
98
    create_valid = function(data,
                            info = list(),
                            ...) {
James Lamb's avatar
James Lamb committed
99

100
101
102
103
104
105
106
107
108
109
110
      # Create new dataset
      ret <- Dataset$new(data,
                         private$params,
                         self,
                         private$colnames,
                         private$categorical_feature,
                         private$predictor,
                         private$free_raw_data,
                         NULL,
                         info,
                         ...)
James Lamb's avatar
James Lamb committed
111

112
      # Return ret
113
      return(invisible(ret))
James Lamb's avatar
James Lamb committed
114

Guolin Ke's avatar
Guolin Ke committed
115
    },
James Lamb's avatar
James Lamb committed
116

117
    # Dataset constructor
Guolin Ke's avatar
Guolin Ke committed
118
    construct = function() {
James Lamb's avatar
James Lamb committed
119

120
      # Check for handle null
Guolin Ke's avatar
Guolin Ke committed
121
      if (!lgb.is.null.handle(private$handle)) {
122
        return(invisible(self))
Guolin Ke's avatar
Guolin Ke committed
123
      }
James Lamb's avatar
James Lamb committed
124

Guolin Ke's avatar
Guolin Ke committed
125
126
      # Get feature names
      cnames <- NULL
James Lamb's avatar
James Lamb committed
127
      if (is.matrix(private$raw_data) || methods::is(private$raw_data, "dgCMatrix")) {
Guolin Ke's avatar
Guolin Ke committed
128
129
        cnames <- colnames(private$raw_data)
      }
James Lamb's avatar
James Lamb committed
130

Guolin Ke's avatar
Guolin Ke committed
131
      # set feature names if not exist
132
      if (is.null(private$colnames) && !is.null(cnames)) {
Guolin Ke's avatar
Guolin Ke committed
133
134
        private$colnames <- as.character(cnames)
      }
James Lamb's avatar
James Lamb committed
135

136
137
      # Get categorical feature index
      if (!is.null(private$categorical_feature)) {
James Lamb's avatar
James Lamb committed
138

139
        # Check for character name
140
        if (is.character(private$categorical_feature)) {
James Lamb's avatar
James Lamb committed
141

142
            cate_indices <- as.list(match(private$categorical_feature, private$colnames) - 1)
James Lamb's avatar
James Lamb committed
143

144
            # Provided indices, but some indices are not existing?
145
146
147
            if (sum(is.na(cate_indices)) > 0) {
              stop("lgb.self.get.handle: supplied an unknown feature in categorical_feature: ", sQuote(private$categorical_feature[is.na(cate_indices)]))
            }
James Lamb's avatar
James Lamb committed
148

149
          } else {
James Lamb's avatar
James Lamb committed
150

151
            # Check if more categorical features were output over the feature space
152
153
154
            if (max(private$categorical_feature) > length(private$colnames)) {
              stop("lgb.self.get.handle: supplied a too large value in categorical_feature: ", max(private$categorical_feature), " but only ", length(private$colnames), " features")
            }
James Lamb's avatar
James Lamb committed
155

156
            # Store indices as [0, n-1] indexed instead of [1, n] indexed
157
            cate_indices <- as.list(private$categorical_feature - 1)
James Lamb's avatar
James Lamb committed
158

159
          }
James Lamb's avatar
James Lamb committed
160

161
        # Store indices for categorical features
162
        private$params$categorical_feature <- cate_indices
James Lamb's avatar
James Lamb committed
163

164
      }
James Lamb's avatar
James Lamb committed
165

Guolin Ke's avatar
Guolin Ke committed
166
167
      # Check has header or not
      has_header <- FALSE
168
169
      if (!is.null(private$params$has_header) || !is.null(private$params$header)) {
        if (tolower(as.character(private$params$has_header)) == "true" || tolower(as.character(private$params$header)) == "true") {
Guolin Ke's avatar
Guolin Ke committed
170
171
172
          has_header <- TRUE
        }
      }
James Lamb's avatar
James Lamb committed
173

Guolin Ke's avatar
Guolin Ke committed
174
175
      # Generate parameter str
      params_str <- lgb.params2str(private$params)
James Lamb's avatar
James Lamb committed
176

177
      # Get handle of reference dataset
Guolin Ke's avatar
Guolin Ke committed
178
179
180
181
      ref_handle <- NULL
      if (!is.null(private$reference)) {
        ref_handle <- private$reference$.__enclos_env__$private$get_handle()
      }
182
      handle <- NA_real_
James Lamb's avatar
James Lamb committed
183

184
      # Not subsetting
Guolin Ke's avatar
Guolin Ke committed
185
      if (is.null(private$used_indices)) {
James Lamb's avatar
James Lamb committed
186

187
        # Are we using a data file?
188
        if (is.character(private$raw_data)) {
James Lamb's avatar
James Lamb committed
189

190
191
192
193
194
          handle <- lgb.call("LGBM_DatasetCreateFromFile_R",
                             ret = handle,
                             lgb.c_str(private$raw_data),
                             params_str,
                             ref_handle)
James Lamb's avatar
James Lamb committed
195

Guolin Ke's avatar
Guolin Ke committed
196
        } else if (is.matrix(private$raw_data)) {
James Lamb's avatar
James Lamb committed
197

198
199
200
201
202
203
204
205
          # Are we using a matrix?
          handle <- lgb.call("LGBM_DatasetCreateFromMat_R",
                             ret = handle,
                             private$raw_data,
                             nrow(private$raw_data),
                             ncol(private$raw_data),
                             params_str,
                             ref_handle)
James Lamb's avatar
James Lamb committed
206
207

        } else if (methods::is(private$raw_data, "dgCMatrix")) {
208
209
210
          if (length(private$raw_data@p) > 2147483647) {
            stop("Cannot support large CSC matrix")
          }
211
212
213
214
215
216
217
218
219
220
221
          # Are we using a dgCMatrix (sparsed matrix column compressed)
          handle <- lgb.call("LGBM_DatasetCreateFromCSC_R",
                             ret = handle,
                             private$raw_data@p,
                             private$raw_data@i,
                             private$raw_data@x,
                             length(private$raw_data@p),
                             length(private$raw_data@x),
                             nrow(private$raw_data),
                             params_str,
                             ref_handle)
James Lamb's avatar
James Lamb committed
222

Guolin Ke's avatar
Guolin Ke committed
223
        } else {
James Lamb's avatar
James Lamb committed
224

225
226
          # Unknown data type
          stop("lgb.Dataset.construct: does not support constructing from ", sQuote(class(private$raw_data)))
James Lamb's avatar
James Lamb committed
227

Guolin Ke's avatar
Guolin Ke committed
228
        }
James Lamb's avatar
James Lamb committed
229

Guolin Ke's avatar
Guolin Ke committed
230
      } else {
James Lamb's avatar
James Lamb committed
231

232
        # Reference is empty
Guolin Ke's avatar
Guolin Ke committed
233
        if (is.null(private$reference)) {
234
          stop("lgb.Dataset.construct: reference cannot be NULL for constructing data subset")
Guolin Ke's avatar
Guolin Ke committed
235
        }
James Lamb's avatar
James Lamb committed
236

237
238
239
240
        # Construct subset
        handle <- lgb.call("LGBM_DatasetGetSubset_R",
                           ret = handle,
                           ref_handle,
241
                           c(private$used_indices), # Adding c() fixes issue in R v3.5
242
243
                           length(private$used_indices),
                           params_str)
James Lamb's avatar
James Lamb committed
244

Guolin Ke's avatar
Guolin Ke committed
245
      }
Guolin Ke's avatar
Guolin Ke committed
246
247
248
      if (lgb.is.null.handle(handle)) {
        stop("lgb.Dataset.construct: cannot create Dataset handle")
      }
249
      # Setup class and private type
Guolin Ke's avatar
Guolin Ke committed
250
251
      class(handle) <- "lgb.Dataset.handle"
      private$handle <- handle
James Lamb's avatar
James Lamb committed
252

253
254
255
256
      # Set feature names
      if (!is.null(private$colnames)) {
        self$set_colnames(private$colnames)
      }
257

258
259
      # Load init score if requested
      if (!is.null(private$predictor) && is.null(private$used_indices)) {
James Lamb's avatar
James Lamb committed
260

261
        # Setup initial scores
262
        init_score <- private$predictor$predict(private$raw_data, rawscore = TRUE, reshape = TRUE)
James Lamb's avatar
James Lamb committed
263

264
        # Not needed to transpose, for is col_marjor
Guolin Ke's avatar
Guolin Ke committed
265
266
        init_score <- as.vector(init_score)
        private$info$init_score <- init_score
James Lamb's avatar
James Lamb committed
267

268
      }
James Lamb's avatar
James Lamb committed
269

270
271
272
      # Should we free raw data?
      if (isTRUE(private$free_raw_data)) {
        private$raw_data <- NULL
Guolin Ke's avatar
Guolin Ke committed
273
      }
James Lamb's avatar
James Lamb committed
274

275
      # Get private information
Guolin Ke's avatar
Guolin Ke committed
276
      if (length(private$info) > 0) {
James Lamb's avatar
James Lamb committed
277

278
        # Set infos
279
        for (i in seq_along(private$info)) {
James Lamb's avatar
James Lamb committed
280

Guolin Ke's avatar
Guolin Ke committed
281
282
          p <- private$info[i]
          self$setinfo(names(p), p[[1]])
James Lamb's avatar
James Lamb committed
283

Guolin Ke's avatar
Guolin Ke committed
284
        }
James Lamb's avatar
James Lamb committed
285

Guolin Ke's avatar
Guolin Ke committed
286
      }
James Lamb's avatar
James Lamb committed
287

288
      # Get label information existence
Guolin Ke's avatar
Guolin Ke committed
289
290
291
      if (is.null(self$getinfo("label"))) {
        stop("lgb.Dataset.construct: label should be set")
      }
James Lamb's avatar
James Lamb committed
292

293
294
      # Return self
      return(invisible(self))
James Lamb's avatar
James Lamb committed
295

Guolin Ke's avatar
Guolin Ke committed
296
    },
James Lamb's avatar
James Lamb committed
297

298
    # Dimension function
Guolin Ke's avatar
Guolin Ke committed
299
    dim = function() {
James Lamb's avatar
James Lamb committed
300

301
      # Check for handle
Guolin Ke's avatar
Guolin Ke committed
302
      if (!lgb.is.null.handle(private$handle)) {
James Lamb's avatar
James Lamb committed
303

304
305
        num_row <- 0L
        num_col <- 0L
James Lamb's avatar
James Lamb committed
306

307
308
309
        # Get numeric data and numeric features
        c(lgb.call("LGBM_DatasetGetNumData_R", ret = num_row, private$handle),
          lgb.call("LGBM_DatasetGetNumFeature_R", ret = num_col, private$handle))
James Lamb's avatar
James Lamb committed
310
311
312

      } else if (is.matrix(private$raw_data) || methods::is(private$raw_data, "dgCMatrix")) {

313
        # Check if dgCMatrix (sparse matrix column compressed)
314
        # NOTE: requires Matrix package
315
        dim(private$raw_data)
James Lamb's avatar
James Lamb committed
316

Guolin Ke's avatar
Guolin Ke committed
317
      } else {
James Lamb's avatar
James Lamb committed
318

319
320
        # Trying to work with unknown dimensions is not possible
        stop("dim: cannot get dimensions before dataset has been constructed, please call lgb.Dataset.construct explicitly")
James Lamb's avatar
James Lamb committed
321

Guolin Ke's avatar
Guolin Ke committed
322
      }
James Lamb's avatar
James Lamb committed
323

Guolin Ke's avatar
Guolin Ke committed
324
    },
James Lamb's avatar
James Lamb committed
325

326
    # Get column names
Guolin Ke's avatar
Guolin Ke committed
327
    get_colnames = function() {
James Lamb's avatar
James Lamb committed
328

329
      # Check for handle
Guolin Ke's avatar
Guolin Ke committed
330
      if (!lgb.is.null.handle(private$handle)) {
James Lamb's avatar
James Lamb committed
331

332
        # Get feature names and write them
333
334
335
        cnames <- lgb.call.return.str("LGBM_DatasetGetFeatureNames_R", private$handle)
        private$colnames <- as.character(base::strsplit(cnames, "\t")[[1]])
        private$colnames
James Lamb's avatar
James Lamb committed
336
337
338

      } else if (is.matrix(private$raw_data) || methods::is(private$raw_data, "dgCMatrix")) {

339
        # Check if dgCMatrix (sparse matrix column compressed)
340
        colnames(private$raw_data)
James Lamb's avatar
James Lamb committed
341

Guolin Ke's avatar
Guolin Ke committed
342
      } else {
James Lamb's avatar
James Lamb committed
343

344
345
        # Trying to work with unknown dimensions is not possible
        stop("dim: cannot get dimensions before dataset has been constructed, please call lgb.Dataset.construct explicitly")
James Lamb's avatar
James Lamb committed
346

Guolin Ke's avatar
Guolin Ke committed
347
      }
James Lamb's avatar
James Lamb committed
348

Guolin Ke's avatar
Guolin Ke committed
349
    },
James Lamb's avatar
James Lamb committed
350

351
    # Set column names
Guolin Ke's avatar
Guolin Ke committed
352
    set_colnames = function(colnames) {
James Lamb's avatar
James Lamb committed
353

354
355
      # Check column names non-existence
      if (is.null(colnames)) {
356
        return(invisible(self))
357
      }
James Lamb's avatar
James Lamb committed
358

359
      # Check empty column names
Guolin Ke's avatar
Guolin Ke committed
360
      colnames <- as.character(colnames)
361
      if (length(colnames) == 0) {
362
        return(invisible(self))
363
      }
James Lamb's avatar
James Lamb committed
364

365
      # Write column names
Guolin Ke's avatar
Guolin Ke committed
366
367
      private$colnames <- colnames
      if (!lgb.is.null.handle(private$handle)) {
James Lamb's avatar
James Lamb committed
368

369
        # Merge names with tab separation
Guolin Ke's avatar
Guolin Ke committed
370
371
372
373
374
        merged_name <- paste0(as.list(private$colnames), collapse = "\t")
        lgb.call("LGBM_DatasetSetFeatureNames_R",
                 ret = NULL,
                 private$handle,
                 lgb.c_str(merged_name))
James Lamb's avatar
James Lamb committed
375

Guolin Ke's avatar
Guolin Ke committed
376
      }
James Lamb's avatar
James Lamb committed
377

378
      # Return self
379
      return(invisible(self))
James Lamb's avatar
James Lamb committed
380

Guolin Ke's avatar
Guolin Ke committed
381
    },
James Lamb's avatar
James Lamb committed
382

383
    # Get information
Guolin Ke's avatar
Guolin Ke committed
384
    getinfo = function(name) {
James Lamb's avatar
James Lamb committed
385

386
      # Create known attributes list
387
      INFONAMES <- c("label", "weight", "init_score", "group")
James Lamb's avatar
James Lamb committed
388

389
390
391
      # Check if attribute key is in the known attribute list
      if (!is.character(name) || length(name) != 1 || !name %in% INFONAMES) {
        stop("getinfo: name must one of the following: ", paste0(sQuote(INFONAMES), collapse = ", "))
Guolin Ke's avatar
Guolin Ke committed
392
      }
James Lamb's avatar
James Lamb committed
393

394
      # Check for info name and handle
395
      if (is.null(private$info[[name]])) {
396

397
        if (lgb.is.null.handle(private$handle)){
398
          stop("Cannot perform getinfo before constructing Dataset.")
399
        }
400

401
        # Get field size of info
402
        info_len <- 0L
403
404
405
406
        info_len <- lgb.call("LGBM_DatasetGetFieldSize_R",
                             ret = info_len,
                             private$handle,
                             lgb.c_str(name))
James Lamb's avatar
James Lamb committed
407

408
        # Check if info is not empty
Guolin Ke's avatar
Guolin Ke committed
409
        if (info_len > 0) {
James Lamb's avatar
James Lamb committed
410

411
          # Get back fields
Guolin Ke's avatar
Guolin Ke committed
412
          ret <- NULL
413
414
415
416
417
          ret <- if (name == "group") {
            integer(info_len) # Integer
          } else {
            numeric(info_len) # Numeric
          }
James Lamb's avatar
James Lamb committed
418

419
420
421
422
          ret <- lgb.call("LGBM_DatasetGetField_R",
                          ret = ret,
                          private$handle,
                          lgb.c_str(name))
James Lamb's avatar
James Lamb committed
423

Guolin Ke's avatar
Guolin Ke committed
424
          private$info[[name]] <- ret
James Lamb's avatar
James Lamb committed
425

Guolin Ke's avatar
Guolin Ke committed
426
427
        }
      }
James Lamb's avatar
James Lamb committed
428

429
      private$info[[name]]
James Lamb's avatar
James Lamb committed
430

Guolin Ke's avatar
Guolin Ke committed
431
    },
James Lamb's avatar
James Lamb committed
432

433
    # Set information
Guolin Ke's avatar
Guolin Ke committed
434
    setinfo = function(name, info) {
James Lamb's avatar
James Lamb committed
435

436
      # Create known attributes list
437
      INFONAMES <- c("label", "weight", "init_score", "group")
James Lamb's avatar
James Lamb committed
438

439
440
441
442
      # Check if attribute key is in the known attribute list
      if (!is.character(name) || length(name) != 1 || !name %in% INFONAMES) {
        stop("setinfo: name must one of the following: ", paste0(sQuote(INFONAMES), collapse = ", "))
      }
James Lamb's avatar
James Lamb committed
443

444
445
446
447
448
449
      # Check for type of information
      info <- if (name == "group") {
        as.integer(info) # Integer
      } else {
        as.numeric(info) # Numeric
      }
James Lamb's avatar
James Lamb committed
450

451
      # Store information privately
Guolin Ke's avatar
Guolin Ke committed
452
      private$info[[name]] <- info
James Lamb's avatar
James Lamb committed
453

454
      if (!lgb.is.null.handle(private$handle) && !is.null(info)) {
James Lamb's avatar
James Lamb committed
455

Guolin Ke's avatar
Guolin Ke committed
456
        if (length(info) > 0) {
James Lamb's avatar
James Lamb committed
457

458
459
460
461
462
463
          lgb.call("LGBM_DatasetSetField_R",
                   ret = NULL,
                   private$handle,
                   lgb.c_str(name),
                   info,
                   length(info))
James Lamb's avatar
James Lamb committed
464

Guolin Ke's avatar
Guolin Ke committed
465
        }
James Lamb's avatar
James Lamb committed
466

Guolin Ke's avatar
Guolin Ke committed
467
      }
James Lamb's avatar
James Lamb committed
468

469
      # Return self
470
      return(invisible(self))
James Lamb's avatar
James Lamb committed
471

Guolin Ke's avatar
Guolin Ke committed
472
    },
James Lamb's avatar
James Lamb committed
473

474
    # Slice dataset
Guolin Ke's avatar
Guolin Ke committed
475
    slice = function(idxset, ...) {
James Lamb's avatar
James Lamb committed
476

477
478
479
480
481
482
483
484
485
486
487
      # Perform slicing
      Dataset$new(NULL,
                  private$params,
                  self,
                  private$colnames,
                  private$categorical_feature,
                  private$predictor,
                  private$free_raw_data,
                  idxset,
                  NULL,
                  ...)
James Lamb's avatar
James Lamb committed
488

Guolin Ke's avatar
Guolin Ke committed
489
    },
James Lamb's avatar
James Lamb committed
490

491
    # Update parameters
492
    update_params = function(params) {
James Lamb's avatar
James Lamb committed
493

494
      # Parameter updating
495
496
497
498
      if (!lgb.is.null.handle(private$handle)) {
        lgb.call("LGBM_DatasetUpdateParam_R", ret = NULL, private$handle, lgb.params2str(params))
        return(invisible(self))
      }
Guolin Ke's avatar
Guolin Ke committed
499
      private$params <- modifyList(private$params, params)
500
      return(invisible(self))
James Lamb's avatar
James Lamb committed
501

Guolin Ke's avatar
Guolin Ke committed
502
    },
James Lamb's avatar
James Lamb committed
503

504
    # Set categorical feature parameter
505
    set_categorical_feature = function(categorical_feature) {
James Lamb's avatar
James Lamb committed
506

507
508
      # Check for identical input
      if (identical(private$categorical_feature, categorical_feature)) {
509
        return(invisible(self))
510
      }
James Lamb's avatar
James Lamb committed
511

512
      # Check for empty data
513
      if (is.null(private$raw_data)) {
514
515
        stop("set_categorical_feature: cannot set categorical feature after freeing raw data,
          please set ", sQuote("free_raw_data = FALSE"), " when you construct lgb.Dataset")
516
      }
James Lamb's avatar
James Lamb committed
517

518
      # Overwrite categorical features
519
      private$categorical_feature <- categorical_feature
James Lamb's avatar
James Lamb committed
520

521
      # Finalize and return self
522
      self$finalize()
523
      return(invisible(self))
James Lamb's avatar
James Lamb committed
524

525
    },
James Lamb's avatar
James Lamb committed
526

527
    # Set reference
Guolin Ke's avatar
Guolin Ke committed
528
    set_reference = function(reference) {
James Lamb's avatar
James Lamb committed
529

530
      # Set known references
531
      self$set_categorical_feature(reference$.__enclos_env__$private$categorical_feature)
Guolin Ke's avatar
Guolin Ke committed
532
533
      self$set_colnames(reference$get_colnames())
      private$set_predictor(reference$.__enclos_env__$private$predictor)
James Lamb's avatar
James Lamb committed
534

535
536
      # Check for identical references
      if (identical(private$reference, reference)) {
537
        return(invisible(self))
538
      }
James Lamb's avatar
James Lamb committed
539

540
      # Check for empty data
Guolin Ke's avatar
Guolin Ke committed
541
      if (is.null(private$raw_data)) {
James Lamb's avatar
James Lamb committed
542

543
544
        stop("set_reference: cannot set reference after freeing raw data,
          please set ", sQuote("free_raw_data = FALSE"), " when you construct lgb.Dataset")
James Lamb's avatar
James Lamb committed
545

Guolin Ke's avatar
Guolin Ke committed
546
      }
James Lamb's avatar
James Lamb committed
547

548
      # Check for non-existing reference
Guolin Ke's avatar
Guolin Ke committed
549
      if (!is.null(reference)) {
James Lamb's avatar
James Lamb committed
550

551
        # Reference is unknown
Guolin Ke's avatar
Guolin Ke committed
552
        if (!lgb.check.r6.class(reference, "lgb.Dataset")) {
553
          stop("set_reference: Can only use lgb.Dataset as a reference")
Guolin Ke's avatar
Guolin Ke committed
554
        }
James Lamb's avatar
James Lamb committed
555

Guolin Ke's avatar
Guolin Ke committed
556
      }
James Lamb's avatar
James Lamb committed
557

558
      # Store reference
Guolin Ke's avatar
Guolin Ke committed
559
      private$reference <- reference
James Lamb's avatar
James Lamb committed
560

561
      # Finalize and return self
Guolin Ke's avatar
Guolin Ke committed
562
      self$finalize()
563
      return(invisible(self))
James Lamb's avatar
James Lamb committed
564

Guolin Ke's avatar
Guolin Ke committed
565
    },
James Lamb's avatar
James Lamb committed
566

567
    # Save binary model
Guolin Ke's avatar
Guolin Ke committed
568
    save_binary = function(fname) {
James Lamb's avatar
James Lamb committed
569

570
      # Store binary data
Guolin Ke's avatar
Guolin Ke committed
571
572
573
574
575
      self$construct()
      lgb.call("LGBM_DatasetSaveBinary_R",
               ret = NULL,
               private$handle,
               lgb.c_str(fname))
576
      return(invisible(self))
Guolin Ke's avatar
Guolin Ke committed
577
    }
James Lamb's avatar
James Lamb committed
578

Guolin Ke's avatar
Guolin Ke committed
579
580
  ),
  private = list(
581
582
583
584
585
    handle = NULL,
    raw_data = NULL,
    params = list(),
    reference = NULL,
    colnames = NULL,
586
    categorical_feature = NULL,
587
588
589
590
    predictor = NULL,
    free_raw_data = TRUE,
    used_indices = NULL,
    info = NULL,
James Lamb's avatar
James Lamb committed
591

592
593
    # Get handle
    get_handle = function() {
James Lamb's avatar
James Lamb committed
594

595
596
597
598
      # Get handle and construct if needed
      if (lgb.is.null.handle(private$handle)) {
        self$construct()
      }
599
      private$handle
James Lamb's avatar
James Lamb committed
600

Guolin Ke's avatar
Guolin Ke committed
601
    },
James Lamb's avatar
James Lamb committed
602

603
    # Set predictor
Guolin Ke's avatar
Guolin Ke committed
604
    set_predictor = function(predictor) {
James Lamb's avatar
James Lamb committed
605

606
607
      # Return self is identical predictor
      if (identical(private$predictor, predictor)) {
608
        return(invisible(self))
609
      }
James Lamb's avatar
James Lamb committed
610

611
      # Check for empty data
Guolin Ke's avatar
Guolin Ke committed
612
      if (is.null(private$raw_data)) {
613
614
        stop("set_predictor: cannot set predictor after free raw data,
          please set ", sQuote("free_raw_data = FALSE"), " when you construct lgb.Dataset")
Guolin Ke's avatar
Guolin Ke committed
615
      }
James Lamb's avatar
James Lamb committed
616

617
      # Check for empty predictor
Guolin Ke's avatar
Guolin Ke committed
618
      if (!is.null(predictor)) {
James Lamb's avatar
James Lamb committed
619

620
        # Predictor is unknown
Guolin Ke's avatar
Guolin Ke committed
621
        if (!lgb.check.r6.class(predictor, "lgb.Predictor")) {
622
          stop("set_predictor: Can only use lgb.Predictor as predictor")
Guolin Ke's avatar
Guolin Ke committed
623
        }
James Lamb's avatar
James Lamb committed
624

Guolin Ke's avatar
Guolin Ke committed
625
      }
James Lamb's avatar
James Lamb committed
626

627
      # Store predictor
Guolin Ke's avatar
Guolin Ke committed
628
      private$predictor <- predictor
James Lamb's avatar
James Lamb committed
629

630
      # Finalize and return self
Guolin Ke's avatar
Guolin Ke committed
631
      self$finalize()
632
      return(invisible(self))
James Lamb's avatar
James Lamb committed
633

Guolin Ke's avatar
Guolin Ke committed
634
    }
James Lamb's avatar
James Lamb committed
635

Guolin Ke's avatar
Guolin Ke committed
636
637
638
  )
)

wxchan's avatar
wxchan committed
639
#' Construct lgb.Dataset object
Guolin Ke's avatar
Guolin Ke committed
640
#'
wxchan's avatar
wxchan committed
641
#' Construct lgb.Dataset object from dense matrix, sparse matrix
Guolin Ke's avatar
Guolin Ke committed
642
643
644
645
646
647
#' or local file (that was created previously by saving an \code{lgb.Dataset}).
#'
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
#' @param params a list of parameters
#' @param reference reference dataset
#' @param colnames names of columns
648
#' @param categorical_feature categorical features
Guolin Ke's avatar
Guolin Ke committed
649
650
651
#' @param free_raw_data TRUE for need to free raw data after construct
#' @param info a list of information of the lgb.Dataset object
#' @param ... other information to pass to \code{info} or parameters pass to \code{params}
James Lamb's avatar
James Lamb committed
652
#'
Guolin Ke's avatar
Guolin Ke committed
653
#' @return constructed dataset
James Lamb's avatar
James Lamb committed
654
#'
Guolin Ke's avatar
Guolin Ke committed
655
#' @examples
656
657
658
659
660
661
662
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.save(dtrain, "lgb.Dataset.data")
#' dtrain <- lgb.Dataset("lgb.Dataset.data")
#' lgb.Dataset.construct(dtrain)
James Lamb's avatar
James Lamb committed
663
#'
Guolin Ke's avatar
Guolin Ke committed
664
665
#' @export
lgb.Dataset <- function(data,
666
667
668
                        params = list(),
                        reference = NULL,
                        colnames = NULL,
669
                        categorical_feature = NULL,
670
671
                        free_raw_data = TRUE,
                        info = list(),
Guolin Ke's avatar
Guolin Ke committed
672
                        ...) {
James Lamb's avatar
James Lamb committed
673

674
  # Create new dataset
675
  invisible(Dataset$new(data,
676
677
678
679
680
681
682
683
              params,
              reference,
              colnames,
              categorical_feature,
              NULL,
              free_raw_data,
              NULL,
              info,
684
              ...))
James Lamb's avatar
James Lamb committed
685

Guolin Ke's avatar
Guolin Ke committed
686
687
}

wxchan's avatar
wxchan committed
688
#' Construct validation data
James Lamb's avatar
James Lamb committed
689
#'
wxchan's avatar
wxchan committed
690
#' Construct validation data according to training data
James Lamb's avatar
James Lamb committed
691
#'
Guolin Ke's avatar
Guolin Ke committed
692
693
694
695
#' @param dataset \code{lgb.Dataset} object, training data
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
#' @param info a list of information of the lgb.Dataset object
#' @param ... other information to pass to \code{info}.
James Lamb's avatar
James Lamb committed
696
#'
Guolin Ke's avatar
Guolin Ke committed
697
#' @return constructed dataset
James Lamb's avatar
James Lamb committed
698
#'
Guolin Ke's avatar
Guolin Ke committed
699
#' @examples
700
701
702
703
704
705
706
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' data(agaricus.test, package = "lightgbm")
#' test <- agaricus.test
#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
James Lamb's avatar
James Lamb committed
707
#'
Guolin Ke's avatar
Guolin Ke committed
708
#' @export
709
lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) {
James Lamb's avatar
James Lamb committed
710

711
  # Check if dataset is not a dataset
712
713
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.create.valid: input data should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
714
  }
James Lamb's avatar
James Lamb committed
715

716
  # Create validation dataset
717
  invisible(dataset$create_valid(data, info, ...))
James Lamb's avatar
James Lamb committed
718

719
}
Guolin Ke's avatar
Guolin Ke committed
720

721
#' Construct Dataset explicitly
James Lamb's avatar
James Lamb committed
722
#'
Guolin Ke's avatar
Guolin Ke committed
723
#' @param dataset Object of class \code{lgb.Dataset}
James Lamb's avatar
James Lamb committed
724
#'
Guolin Ke's avatar
Guolin Ke committed
725
#' @examples
726
727
728
729
730
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.construct(dtrain)
James Lamb's avatar
James Lamb committed
731
#'
Guolin Ke's avatar
Guolin Ke committed
732
733
#' @export
lgb.Dataset.construct <- function(dataset) {
James Lamb's avatar
James Lamb committed
734

735
  # Check if dataset is not a dataset
736
737
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.construct: input data should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
738
  }
James Lamb's avatar
James Lamb committed
739

740
  # Construct the dataset
741
  invisible(dataset$construct())
James Lamb's avatar
James Lamb committed
742

Guolin Ke's avatar
Guolin Ke committed
743
744
}

745
#' Dimensions of an lgb.Dataset
James Lamb's avatar
James Lamb committed
746
#'
Guolin Ke's avatar
Guolin Ke committed
747
748
749
#' Returns a vector of numbers of rows and of columns in an \code{lgb.Dataset}.
#' @param x Object of class \code{lgb.Dataset}
#' @param ... other parameters
James Lamb's avatar
James Lamb committed
750
#'
Guolin Ke's avatar
Guolin Ke committed
751
#' @return a vector of numbers of rows and of columns
James Lamb's avatar
James Lamb committed
752
#'
Guolin Ke's avatar
Guolin Ke committed
753
754
755
#' @details
#' Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also
#' be directly used with an \code{lgb.Dataset} object.
James Lamb's avatar
James Lamb committed
756
#'
Guolin Ke's avatar
Guolin Ke committed
757
#' @examples
758
759
760
761
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
James Lamb's avatar
James Lamb committed
762
#'
763
764
765
#' stopifnot(nrow(dtrain) == nrow(train$data))
#' stopifnot(ncol(dtrain) == ncol(train$data))
#' stopifnot(all(dim(dtrain) == dim(train$data)))
James Lamb's avatar
James Lamb committed
766
#'
Guolin Ke's avatar
Guolin Ke committed
767
768
769
#' @rdname dim
#' @export
dim.lgb.Dataset <- function(x, ...) {
James Lamb's avatar
James Lamb committed
770

771
  # Check if dataset is not a dataset
772
773
  if (!lgb.is.Dataset(x)) {
    stop("dim.lgb.Dataset: input data should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
774
  }
James Lamb's avatar
James Lamb committed
775

776
  # Return dimensions
777
  x$dim()
James Lamb's avatar
James Lamb committed
778

Guolin Ke's avatar
Guolin Ke committed
779
780
781
782
783
}

#' Handling of column names of \code{lgb.Dataset}
#'
#' Only column names are supported for \code{lgb.Dataset}, thus setting of
784
#' row names would have no effect and returned row names would be NULL.
Guolin Ke's avatar
Guolin Ke committed
785
786
787
788
789
790
791
792
793
794
#'
#' @param x object of class \code{lgb.Dataset}
#' @param value a list of two elements: the first one is ignored
#'        and the second one is column names
#'
#' @details
#' Generic \code{dimnames} methods are used by \code{colnames}.
#' Since row names are irrelevant, it is recommended to use \code{colnames} directly.
#'
#' @examples
795
796
797
798
799
800
801
802
803
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.construct(dtrain)
#' dimnames(dtrain)
#' colnames(dtrain)
#' colnames(dtrain) <- make.names(1:ncol(train$data))
#' print(dtrain, verbose = TRUE)
James Lamb's avatar
James Lamb committed
804
#'
Guolin Ke's avatar
Guolin Ke committed
805
806
807
#' @rdname dimnames.lgb.Dataset
#' @export
dimnames.lgb.Dataset <- function(x) {
James Lamb's avatar
James Lamb committed
808

809
  # Check if dataset is not a dataset
810
811
  if (!lgb.is.Dataset(x)) {
    stop("dimnames.lgb.Dataset: input data should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
812
  }
James Lamb's avatar
James Lamb committed
813

814
  # Return dimension names
815
  list(NULL, x$get_colnames())
James Lamb's avatar
James Lamb committed
816

Guolin Ke's avatar
Guolin Ke committed
817
818
819
820
821
}

#' @rdname dimnames.lgb.Dataset
#' @export
`dimnames<-.lgb.Dataset` <- function(x, value) {
James Lamb's avatar
James Lamb committed
822

823
824
  # Check if invalid element list
  if (!is.list(value) || length(value) != 2L) {
825
    stop("invalid ", sQuote("value"), " given: must be a list of two elements")
826
  }
James Lamb's avatar
James Lamb committed
827

828
829
830
831
  # Check for unknown row names
  if (!is.null(value[[1L]])) {
    stop("lgb.Dataset does not have rownames")
  }
James Lamb's avatar
James Lamb committed
832

833
  # Check for second value missing
Guolin Ke's avatar
Guolin Ke committed
834
  if (is.null(value[[2]])) {
James Lamb's avatar
James Lamb committed
835

836
    # No column names
Guolin Ke's avatar
Guolin Ke committed
837
838
    x$set_colnames(NULL)
    return(x)
James Lamb's avatar
James Lamb committed
839

840
  }
James Lamb's avatar
James Lamb committed
841

842
843
844
  # Check for unmatching column size
  if (ncol(x) != length(value[[2]])) {
    stop("can't assign ", sQuote(length(value[[2]])), " colnames to an lgb.Dataset with ", sQuote(ncol(x)), " columns")
Guolin Ke's avatar
Guolin Ke committed
845
  }
James Lamb's avatar
James Lamb committed
846

847
  # Set column names properly, and return
Guolin Ke's avatar
Guolin Ke committed
848
  x$set_colnames(value[[2]])
849
  x
James Lamb's avatar
James Lamb committed
850

Guolin Ke's avatar
Guolin Ke committed
851
852
}

853
#' Slice a dataset
James Lamb's avatar
James Lamb committed
854
#'
855
#' Get a new \code{lgb.Dataset} containing the specified rows of
James Lamb's avatar
James Lamb committed
856
#' original lgb.Dataset object
857
#'
Guolin Ke's avatar
Guolin Ke committed
858
859
860
861
#' @param dataset Object of class "lgb.Dataset"
#' @param idxset a integer vector of indices of rows needed
#' @param ... other parameters (currently not used)
#' @return constructed sub dataset
James Lamb's avatar
James Lamb committed
862
#'
Guolin Ke's avatar
Guolin Ke committed
863
#' @examples
864
865
866
867
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
James Lamb's avatar
James Lamb committed
868
#'
869
#' dsub <- lightgbm::slice(dtrain, 1:42)
870
#' lgb.Dataset.construct(dsub)
871
#' labels <- lightgbm::getinfo(dsub, "label")
James Lamb's avatar
James Lamb committed
872
#'
Guolin Ke's avatar
Guolin Ke committed
873
#' @export
874
875
876
slice <- function(dataset, ...) {
  UseMethod("slice")
}
Guolin Ke's avatar
Guolin Ke committed
877
878
879
880

#' @rdname slice
#' @export
slice.lgb.Dataset <- function(dataset, idxset, ...) {
James Lamb's avatar
James Lamb committed
881

882
  # Check if dataset is not a dataset
883
884
  if (!lgb.is.Dataset(dataset)) {
    stop("slice.lgb.Dataset: input dataset should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
885
  }
James Lamb's avatar
James Lamb committed
886

887
  # Return sliced set
888
  invisible(dataset$slice(idxset, ...))
James Lamb's avatar
James Lamb committed
889

Guolin Ke's avatar
Guolin Ke committed
890
891
892
}

#' Get information of an lgb.Dataset object
James Lamb's avatar
James Lamb committed
893
#'
Guolin Ke's avatar
Guolin Ke committed
894
895
896
897
#' @param dataset Object of class \code{lgb.Dataset}
#' @param name the name of the information field to get (see details)
#' @param ... other parameters
#' @return info data
James Lamb's avatar
James Lamb committed
898
#'
Guolin Ke's avatar
Guolin Ke committed
899
900
#' @details
#' The \code{name} field can be one of the following:
James Lamb's avatar
James Lamb committed
901
#'
Guolin Ke's avatar
Guolin Ke committed
902
903
904
905
906
907
#' \itemize{
#'     \item \code{label}: label lightgbm learn from ;
#'     \item \code{weight}: to do a weight rescale ;
#'     \item \code{group}: group size
#'     \item \code{init_score}: initial score is the base prediction lightgbm will boost from ;
#' }
James Lamb's avatar
James Lamb committed
908
#'
Guolin Ke's avatar
Guolin Ke committed
909
#' @examples
910
911
912
913
914
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.construct(dtrain)
James Lamb's avatar
James Lamb committed
915
#'
916
917
#' labels <- lightgbm::getinfo(dtrain, "label")
#' lightgbm::setinfo(dtrain, "label", 1 - labels)
James Lamb's avatar
James Lamb committed
918
#'
919
920
#' labels2 <- lightgbm::getinfo(dtrain, "label")
#' stopifnot(all(labels2 == 1 - labels))
James Lamb's avatar
James Lamb committed
921
#'
Guolin Ke's avatar
Guolin Ke committed
922
#' @export
923
924
925
getinfo <- function(dataset, ...) {
  UseMethod("getinfo")
}
Guolin Ke's avatar
Guolin Ke committed
926
927
928
929

#' @rdname getinfo
#' @export
getinfo.lgb.Dataset <- function(dataset, name, ...) {
James Lamb's avatar
James Lamb committed
930

931
  # Check if dataset is not a dataset
932
933
  if (!lgb.is.Dataset(dataset)) {
    stop("getinfo.lgb.Dataset: input dataset should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
934
  }
James Lamb's avatar
James Lamb committed
935

936
  # Return information
937
  dataset$getinfo(name)
James Lamb's avatar
James Lamb committed
938

Guolin Ke's avatar
Guolin Ke committed
939
940
941
}

#' Set information of an lgb.Dataset object
James Lamb's avatar
James Lamb committed
942
#'
Guolin Ke's avatar
Guolin Ke committed
943
944
945
946
947
#' @param dataset Object of class "lgb.Dataset"
#' @param name the name of the field to get
#' @param info the specific field of information to set
#' @param ... other parameters
#' @return passed object
James Lamb's avatar
James Lamb committed
948
#'
Guolin Ke's avatar
Guolin Ke committed
949
950
#' @details
#' The \code{name} field can be one of the following:
James Lamb's avatar
James Lamb committed
951
#'
Guolin Ke's avatar
Guolin Ke committed
952
953
954
955
956
957
#' \itemize{
#'     \item \code{label}: label lightgbm learn from ;
#'     \item \code{weight}: to do a weight rescale ;
#'     \item \code{init_score}: initial score is the base prediction lightgbm will boost from ;
#'     \item \code{group}.
#' }
James Lamb's avatar
James Lamb committed
958
#'
Guolin Ke's avatar
Guolin Ke committed
959
#' @examples
960
961
962
963
964
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.construct(dtrain)
James Lamb's avatar
James Lamb committed
965
#'
966
967
#' labels <- lightgbm::getinfo(dtrain, "label")
#' lightgbm::setinfo(dtrain, "label", 1 - labels)
James Lamb's avatar
James Lamb committed
968
#'
969
970
#' labels2 <- lightgbm::getinfo(dtrain, "label")
#' stopifnot(all.equal(labels2, 1 - labels))
James Lamb's avatar
James Lamb committed
971
#'
Guolin Ke's avatar
Guolin Ke committed
972
#' @export
973
974
975
setinfo <- function(dataset, ...) {
  UseMethod("setinfo")
}
Guolin Ke's avatar
Guolin Ke committed
976
977
978
979

#' @rdname setinfo
#' @export
setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
James Lamb's avatar
James Lamb committed
980

981
  # Check if dataset is not a dataset
982
983
  if (!lgb.is.Dataset(dataset)) {
    stop("setinfo.lgb.Dataset: input dataset should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
984
  }
James Lamb's avatar
James Lamb committed
985

986
  # Set information
987
  invisible(dataset$setinfo(name, info))
Guolin Ke's avatar
Guolin Ke committed
988
989
}

990
#' Set categorical feature of \code{lgb.Dataset}
James Lamb's avatar
James Lamb committed
991
#'
992
993
#' @param dataset object of class \code{lgb.Dataset}
#' @param categorical_feature categorical features
James Lamb's avatar
James Lamb committed
994
#'
995
#' @return passed dataset
James Lamb's avatar
James Lamb committed
996
#'
997
#' @examples
998
999
1000
1001
1002
1003
1004
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.save(dtrain, "lgb.Dataset.data")
#' dtrain <- lgb.Dataset("lgb.Dataset.data")
#' lgb.Dataset.set.categorical(dtrain, 1:2)
James Lamb's avatar
James Lamb committed
1005
#'
1006
1007
1008
#' @rdname lgb.Dataset.set.categorical
#' @export
lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
James Lamb's avatar
James Lamb committed
1009

1010
  # Check if dataset is not a dataset
1011
1012
1013
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.set.categorical: input dataset should be an lgb.Dataset object")
  }
James Lamb's avatar
James Lamb committed
1014

1015
  # Set categoricals
1016
  invisible(dataset$set_categorical_feature(categorical_feature))
James Lamb's avatar
James Lamb committed
1017

1018
1019
}

1020
#' Set reference of \code{lgb.Dataset}
James Lamb's avatar
James Lamb committed
1021
#'
1022
#' If you want to use validation data, you should set reference to training data
James Lamb's avatar
James Lamb committed
1023
#'
Guolin Ke's avatar
Guolin Ke committed
1024
1025
#' @param dataset object of class \code{lgb.Dataset}
#' @param reference object of class \code{lgb.Dataset}
James Lamb's avatar
James Lamb committed
1026
#'
Guolin Ke's avatar
Guolin Ke committed
1027
#' @return passed dataset
James Lamb's avatar
James Lamb committed
1028
#'
Guolin Ke's avatar
Guolin Ke committed
1029
#' @examples
1030
1031
1032
1033
1034
1035
1036
1037
#' library(lightgbm)
#' data(agaricus.train, package ="lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' data(agaricus.test, package = "lightgbm")
#' test <- agaricus.test
#' dtest <- lgb.Dataset(test$data, test = train$label)
#' lgb.Dataset.set.reference(dtest, dtrain)
James Lamb's avatar
James Lamb committed
1038
#'
Guolin Ke's avatar
Guolin Ke committed
1039
1040
1041
#' @rdname lgb.Dataset.set.reference
#' @export
lgb.Dataset.set.reference <- function(dataset, reference) {
James Lamb's avatar
James Lamb committed
1042

1043
  # Check if dataset is not a dataset
1044
1045
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.set.reference: input dataset should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
1046
  }
James Lamb's avatar
James Lamb committed
1047

1048
  # Set reference
1049
  invisible(dataset$set_reference(reference))
Guolin Ke's avatar
Guolin Ke committed
1050
1051
}

1052
#' Save \code{lgb.Dataset} to a binary file
James Lamb's avatar
James Lamb committed
1053
#'
Guolin Ke's avatar
Guolin Ke committed
1054
1055
#' @param dataset object of class \code{lgb.Dataset}
#' @param fname object filename of output file
James Lamb's avatar
James Lamb committed
1056
#'
Guolin Ke's avatar
Guolin Ke committed
1057
#' @return passed dataset
James Lamb's avatar
James Lamb committed
1058
#'
Guolin Ke's avatar
Guolin Ke committed
1059
#' @examples
James Lamb's avatar
James Lamb committed
1060
#'
1061
1062
1063
1064
1065
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.save(dtrain, "data.bin")
James Lamb's avatar
James Lamb committed
1066
#'
Guolin Ke's avatar
Guolin Ke committed
1067
1068
1069
#' @rdname lgb.Dataset.save
#' @export
lgb.Dataset.save <- function(dataset, fname) {
James Lamb's avatar
James Lamb committed
1070

1071
  # Check if dataset is not a dataset
1072
1073
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.set: input dataset should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
1074
  }
James Lamb's avatar
James Lamb committed
1075

1076
  # File-type is not matching
1077
1078
  if (!is.character(fname)) {
    stop("lgb.Dataset.set: fname should be a character or a file connection")
Guolin Ke's avatar
Guolin Ke committed
1079
  }
James Lamb's avatar
James Lamb committed
1080

1081
  # Store binary
1082
  invisible(dataset$save_binary(fname))
Guolin Ke's avatar
Guolin Ke committed
1083
}