lgb.Dataset.R 30.1 KB
Newer Older
James Lamb's avatar
James Lamb committed
1
#' @importFrom methods is
James Lamb's avatar
James Lamb committed
2
3
4
#' @importFrom R6 R6Class
Dataset <- R6::R6Class(

5
  classname = "lgb.Dataset",
6
  cloneable = FALSE,
Guolin Ke's avatar
Guolin Ke committed
7
  public = list(
James Lamb's avatar
James Lamb committed
8

9
    # Finalize will free up the handles
Guolin Ke's avatar
Guolin Ke committed
10
    finalize = function() {
James Lamb's avatar
James Lamb committed
11

12
      # Check the need for freeing handle
Guolin Ke's avatar
Guolin Ke committed
13
      if (!lgb.is.null.handle(private$handle)) {
James Lamb's avatar
James Lamb committed
14

15
        # Freeing up handle
Guolin Ke's avatar
Guolin Ke committed
16
17
        lgb.call("LGBM_DatasetFree_R", ret = NULL, private$handle)
        private$handle <- NULL
James Lamb's avatar
James Lamb committed
18

Guolin Ke's avatar
Guolin Ke committed
19
      }
James Lamb's avatar
James Lamb committed
20

Guolin Ke's avatar
Guolin Ke committed
21
    },
James Lamb's avatar
James Lamb committed
22

23
    # Initialize will create a starter dataset
Guolin Ke's avatar
Guolin Ke committed
24
    initialize = function(data,
25
26
27
                          params = list(),
                          reference = NULL,
                          colnames = NULL,
28
                          categorical_feature = NULL,
29
30
31
32
                          predictor = NULL,
                          free_raw_data = TRUE,
                          used_indices = NULL,
                          info = list(),
Guolin Ke's avatar
Guolin Ke committed
33
                          ...) {
James Lamb's avatar
James Lamb committed
34

35
      # Check for additional parameters
36
      additional_params <- list(...)
James Lamb's avatar
James Lamb committed
37

38
39
      # Create known attributes list
      INFO_KEYS <- c("label", "weight", "init_score", "group")
James Lamb's avatar
James Lamb committed
40

41
      # Check if attribute key is in the known attribute list
42
      for (key in names(additional_params)) {
James Lamb's avatar
James Lamb committed
43

44
        # Key existing
45
        if (key %in% INFO_KEYS) {
James Lamb's avatar
James Lamb committed
46

47
          # Store as info
48
          info[[key]] <- additional_params[[key]]
James Lamb's avatar
James Lamb committed
49

Guolin Ke's avatar
Guolin Ke committed
50
        } else {
James Lamb's avatar
James Lamb committed
51

52
          # Store as param
53
          params[[key]] <- additional_params[[key]]
James Lamb's avatar
James Lamb committed
54

Guolin Ke's avatar
Guolin Ke committed
55
        }
James Lamb's avatar
James Lamb committed
56

Guolin Ke's avatar
Guolin Ke committed
57
      }
James Lamb's avatar
James Lamb committed
58

59
      # Check for dataset reference
Guolin Ke's avatar
Guolin Ke committed
60
61
      if (!is.null(reference)) {
        if (!lgb.check.r6.class(reference, "lgb.Dataset")) {
62
          stop("lgb.Dataset: Can only use ", sQuote("lgb.Dataset"), " as reference")
Guolin Ke's avatar
Guolin Ke committed
63
64
        }
      }
James Lamb's avatar
James Lamb committed
65

66
      # Check for predictor reference
Guolin Ke's avatar
Guolin Ke committed
67
68
      if (!is.null(predictor)) {
        if (!lgb.check.r6.class(predictor, "lgb.Predictor")) {
69
          stop("lgb.Dataset: Only can use ", sQuote("lgb.Predictor"), " as predictor")
Guolin Ke's avatar
Guolin Ke committed
70
71
        }
      }
James Lamb's avatar
James Lamb committed
72

73
74
75
76
77
78
79
      # Check for matrix format
      if (is.matrix(data)) {
        # Check whether matrix is the correct type first ("double")
        if (storage.mode(data) != "double") {
          storage.mode(data) <- "double"
        }
      }
James Lamb's avatar
James Lamb committed
80

81
82
83
      # Setup private attributes
      private$raw_data <- data
      private$params <- params
Guolin Ke's avatar
Guolin Ke committed
84
      private$reference <- reference
85
      private$colnames <- colnames
86

87
      private$categorical_feature <- categorical_feature
88
89
      private$predictor <- predictor
      private$free_raw_data <- free_raw_data
90
      private$used_indices <- sort(used_indices, decreasing = FALSE)
91
      private$info <- info
James Lamb's avatar
James Lamb committed
92

Guolin Ke's avatar
Guolin Ke committed
93
    },
James Lamb's avatar
James Lamb committed
94

95
96
97
    create_valid = function(data,
                            info = list(),
                            ...) {
James Lamb's avatar
James Lamb committed
98

99
      # Create new dataset
100
101
102
103
104
105
106
107
108
109
110
111
      ret <- Dataset$new(
        data = data
        , params = private$params
        , reference = self
        , colnames = private$colnames
        , categorical_feature = private$categorical_feature
        , predictor = private$predictor
        , free_raw_data = private$free_raw_data
        , used_indices = NULL
        , info = info
        , ...
      )
James Lamb's avatar
James Lamb committed
112

113
      # Return ret
114
      return(invisible(ret))
James Lamb's avatar
James Lamb committed
115

Guolin Ke's avatar
Guolin Ke committed
116
    },
James Lamb's avatar
James Lamb committed
117

118
    # Dataset constructor
Guolin Ke's avatar
Guolin Ke committed
119
    construct = function() {
James Lamb's avatar
James Lamb committed
120

121
      # Check for handle null
Guolin Ke's avatar
Guolin Ke committed
122
      if (!lgb.is.null.handle(private$handle)) {
123
        return(invisible(self))
Guolin Ke's avatar
Guolin Ke committed
124
      }
James Lamb's avatar
James Lamb committed
125

Guolin Ke's avatar
Guolin Ke committed
126
127
      # Get feature names
      cnames <- NULL
James Lamb's avatar
James Lamb committed
128
      if (is.matrix(private$raw_data) || methods::is(private$raw_data, "dgCMatrix")) {
Guolin Ke's avatar
Guolin Ke committed
129
130
        cnames <- colnames(private$raw_data)
      }
James Lamb's avatar
James Lamb committed
131

Guolin Ke's avatar
Guolin Ke committed
132
      # set feature names if not exist
133
      if (is.null(private$colnames) && !is.null(cnames)) {
Guolin Ke's avatar
Guolin Ke committed
134
135
        private$colnames <- as.character(cnames)
      }
James Lamb's avatar
James Lamb committed
136

137
138
      # Get categorical feature index
      if (!is.null(private$categorical_feature)) {
James Lamb's avatar
James Lamb committed
139

140
        # Check for character name
141
        if (is.character(private$categorical_feature)) {
James Lamb's avatar
James Lamb committed
142

143
            cate_indices <- as.list(match(private$categorical_feature, private$colnames) - 1)
James Lamb's avatar
James Lamb committed
144

145
            # Provided indices, but some indices are not existing?
146
            if (sum(is.na(cate_indices)) > 0) {
147
148
149
150
              stop(
                "lgb.self.get.handle: supplied an unknown feature in categorical_feature: "
                , sQuote(private$categorical_feature[is.na(cate_indices)])
              )
151
            }
James Lamb's avatar
James Lamb committed
152

153
          } else {
James Lamb's avatar
James Lamb committed
154

155
            # Check if more categorical features were output over the feature space
156
            if (max(private$categorical_feature) > length(private$colnames)) {
157
158
159
160
161
162
163
              stop(
                "lgb.self.get.handle: supplied a too large value in categorical_feature: "
                , max(private$categorical_feature)
                , " but only "
                , length(private$colnames)
                , " features"
              )
164
            }
James Lamb's avatar
James Lamb committed
165

166
            # Store indices as [0, n-1] indexed instead of [1, n] indexed
167
            cate_indices <- as.list(private$categorical_feature - 1)
James Lamb's avatar
James Lamb committed
168

169
          }
James Lamb's avatar
James Lamb committed
170

171
        # Store indices for categorical features
172
        private$params$categorical_feature <- cate_indices
James Lamb's avatar
James Lamb committed
173

174
      }
James Lamb's avatar
James Lamb committed
175

Guolin Ke's avatar
Guolin Ke committed
176
177
      # Check has header or not
      has_header <- FALSE
178
      if (!is.null(private$params$has_header) || !is.null(private$params$header)) {
179
180
181
        params_has_header <- tolower(as.character(private$params$has_header)) == "true"
        params_header <- tolower(as.character(private$params$header)) == "true"
        if (params_has_header || params_header) {
Guolin Ke's avatar
Guolin Ke committed
182
183
184
          has_header <- TRUE
        }
      }
James Lamb's avatar
James Lamb committed
185

Guolin Ke's avatar
Guolin Ke committed
186
187
      # Generate parameter str
      params_str <- lgb.params2str(private$params)
James Lamb's avatar
James Lamb committed
188

189
      # Get handle of reference dataset
Guolin Ke's avatar
Guolin Ke committed
190
191
192
193
      ref_handle <- NULL
      if (!is.null(private$reference)) {
        ref_handle <- private$reference$.__enclos_env__$private$get_handle()
      }
194
      handle <- NA_real_
James Lamb's avatar
James Lamb committed
195

196
      # Not subsetting
Guolin Ke's avatar
Guolin Ke committed
197
      if (is.null(private$used_indices)) {
James Lamb's avatar
James Lamb committed
198

199
        # Are we using a data file?
200
        if (is.character(private$raw_data)) {
James Lamb's avatar
James Lamb committed
201

202
203
204
205
206
207
208
          handle <- lgb.call(
            "LGBM_DatasetCreateFromFile_R"
            , ret = handle
            , lgb.c_str(private$raw_data)
            , params_str
            , ref_handle
          )
James Lamb's avatar
James Lamb committed
209

Guolin Ke's avatar
Guolin Ke committed
210
        } else if (is.matrix(private$raw_data)) {
James Lamb's avatar
James Lamb committed
211

212
          # Are we using a matrix?
213
214
215
216
217
218
219
220
221
          handle <- lgb.call(
            "LGBM_DatasetCreateFromMat_R"
            , ret = handle
            , private$raw_data
            , nrow(private$raw_data)
            , ncol(private$raw_data)
            , params_str
            , ref_handle
          )
James Lamb's avatar
James Lamb committed
222
223

        } else if (methods::is(private$raw_data, "dgCMatrix")) {
224
225
226
          if (length(private$raw_data@p) > 2147483647) {
            stop("Cannot support large CSC matrix")
          }
227
          # Are we using a dgCMatrix (sparsed matrix column compressed)
228
229
230
231
232
233
234
235
236
237
238
239
          handle <- lgb.call(
            "LGBM_DatasetCreateFromCSC_R"
            , ret = handle
            , private$raw_data@p
            , private$raw_data@i
            , private$raw_data@x
            , length(private$raw_data@p)
            , length(private$raw_data@x)
            , nrow(private$raw_data)
            , params_str
            , ref_handle
          )
James Lamb's avatar
James Lamb committed
240

Guolin Ke's avatar
Guolin Ke committed
241
        } else {
James Lamb's avatar
James Lamb committed
242

243
          # Unknown data type
244
245
246
247
          stop(
            "lgb.Dataset.construct: does not support constructing from "
            , sQuote(class(private$raw_data))
          )
James Lamb's avatar
James Lamb committed
248

Guolin Ke's avatar
Guolin Ke committed
249
        }
James Lamb's avatar
James Lamb committed
250

Guolin Ke's avatar
Guolin Ke committed
251
      } else {
James Lamb's avatar
James Lamb committed
252

253
        # Reference is empty
Guolin Ke's avatar
Guolin Ke committed
254
        if (is.null(private$reference)) {
255
          stop("lgb.Dataset.construct: reference cannot be NULL for constructing data subset")
Guolin Ke's avatar
Guolin Ke committed
256
        }
James Lamb's avatar
James Lamb committed
257

258
        # Construct subset
259
260
261
262
263
264
265
266
        handle <- lgb.call(
          "LGBM_DatasetGetSubset_R"
          , ret = handle
          , ref_handle
          , c(private$used_indices) # Adding c() fixes issue in R v3.5
          , length(private$used_indices)
          , params_str
        )
James Lamb's avatar
James Lamb committed
267

Guolin Ke's avatar
Guolin Ke committed
268
      }
Guolin Ke's avatar
Guolin Ke committed
269
270
271
      if (lgb.is.null.handle(handle)) {
        stop("lgb.Dataset.construct: cannot create Dataset handle")
      }
272
      # Setup class and private type
Guolin Ke's avatar
Guolin Ke committed
273
274
      class(handle) <- "lgb.Dataset.handle"
      private$handle <- handle
James Lamb's avatar
James Lamb committed
275

276
277
278
279
      # Set feature names
      if (!is.null(private$colnames)) {
        self$set_colnames(private$colnames)
      }
280

281
282
      # Load init score if requested
      if (!is.null(private$predictor) && is.null(private$used_indices)) {
James Lamb's avatar
James Lamb committed
283

284
        # Setup initial scores
285
286
287
288
289
        init_score <- private$predictor$predict(
          private$raw_data
          , rawscore = TRUE
          , reshape = TRUE
        )
James Lamb's avatar
James Lamb committed
290

291
        # Not needed to transpose, for is col_marjor
Guolin Ke's avatar
Guolin Ke committed
292
293
        init_score <- as.vector(init_score)
        private$info$init_score <- init_score
James Lamb's avatar
James Lamb committed
294

295
      }
James Lamb's avatar
James Lamb committed
296

297
298
299
      # Should we free raw data?
      if (isTRUE(private$free_raw_data)) {
        private$raw_data <- NULL
Guolin Ke's avatar
Guolin Ke committed
300
      }
James Lamb's avatar
James Lamb committed
301

302
      # Get private information
Guolin Ke's avatar
Guolin Ke committed
303
      if (length(private$info) > 0) {
James Lamb's avatar
James Lamb committed
304

305
        # Set infos
306
        for (i in seq_along(private$info)) {
James Lamb's avatar
James Lamb committed
307

Guolin Ke's avatar
Guolin Ke committed
308
309
          p <- private$info[i]
          self$setinfo(names(p), p[[1]])
James Lamb's avatar
James Lamb committed
310

Guolin Ke's avatar
Guolin Ke committed
311
        }
James Lamb's avatar
James Lamb committed
312

Guolin Ke's avatar
Guolin Ke committed
313
      }
James Lamb's avatar
James Lamb committed
314

315
      # Get label information existence
Guolin Ke's avatar
Guolin Ke committed
316
317
318
      if (is.null(self$getinfo("label"))) {
        stop("lgb.Dataset.construct: label should be set")
      }
James Lamb's avatar
James Lamb committed
319

320
321
      # Return self
      return(invisible(self))
James Lamb's avatar
James Lamb committed
322

Guolin Ke's avatar
Guolin Ke committed
323
    },
James Lamb's avatar
James Lamb committed
324

325
    # Dimension function
Guolin Ke's avatar
Guolin Ke committed
326
    dim = function() {
James Lamb's avatar
James Lamb committed
327

328
      # Check for handle
Guolin Ke's avatar
Guolin Ke committed
329
      if (!lgb.is.null.handle(private$handle)) {
James Lamb's avatar
James Lamb committed
330

331
332
        num_row <- 0L
        num_col <- 0L
James Lamb's avatar
James Lamb committed
333

334
335
336
        # Get numeric data and numeric features
        c(lgb.call("LGBM_DatasetGetNumData_R", ret = num_row, private$handle),
          lgb.call("LGBM_DatasetGetNumFeature_R", ret = num_col, private$handle))
James Lamb's avatar
James Lamb committed
337
338
339

      } else if (is.matrix(private$raw_data) || methods::is(private$raw_data, "dgCMatrix")) {

340
        # Check if dgCMatrix (sparse matrix column compressed)
341
        # NOTE: requires Matrix package
342
        dim(private$raw_data)
James Lamb's avatar
James Lamb committed
343

Guolin Ke's avatar
Guolin Ke committed
344
      } else {
James Lamb's avatar
James Lamb committed
345

346
        # Trying to work with unknown dimensions is not possible
347
348
349
350
        stop(
          "dim: cannot get dimensions before dataset has been constructed, "
          , "please call lgb.Dataset.construct explicitly"
        )
James Lamb's avatar
James Lamb committed
351

Guolin Ke's avatar
Guolin Ke committed
352
      }
James Lamb's avatar
James Lamb committed
353

Guolin Ke's avatar
Guolin Ke committed
354
    },
James Lamb's avatar
James Lamb committed
355

356
    # Get column names
Guolin Ke's avatar
Guolin Ke committed
357
    get_colnames = function() {
James Lamb's avatar
James Lamb committed
358

359
      # Check for handle
Guolin Ke's avatar
Guolin Ke committed
360
      if (!lgb.is.null.handle(private$handle)) {
James Lamb's avatar
James Lamb committed
361

362
        # Get feature names and write them
363
364
365
        cnames <- lgb.call.return.str("LGBM_DatasetGetFeatureNames_R", private$handle)
        private$colnames <- as.character(base::strsplit(cnames, "\t")[[1]])
        private$colnames
James Lamb's avatar
James Lamb committed
366
367
368

      } else if (is.matrix(private$raw_data) || methods::is(private$raw_data, "dgCMatrix")) {

369
        # Check if dgCMatrix (sparse matrix column compressed)
370
        colnames(private$raw_data)
James Lamb's avatar
James Lamb committed
371

Guolin Ke's avatar
Guolin Ke committed
372
      } else {
James Lamb's avatar
James Lamb committed
373

374
        # Trying to work with unknown dimensions is not possible
375
376
377
378
        stop(
          "dim: cannot get dimensions before dataset has been constructed, please call "
          , "lgb.Dataset.construct explicitly"
        )
James Lamb's avatar
James Lamb committed
379

Guolin Ke's avatar
Guolin Ke committed
380
      }
James Lamb's avatar
James Lamb committed
381

Guolin Ke's avatar
Guolin Ke committed
382
    },
James Lamb's avatar
James Lamb committed
383

384
    # Set column names
Guolin Ke's avatar
Guolin Ke committed
385
    set_colnames = function(colnames) {
James Lamb's avatar
James Lamb committed
386

387
388
      # Check column names non-existence
      if (is.null(colnames)) {
389
        return(invisible(self))
390
      }
James Lamb's avatar
James Lamb committed
391

392
      # Check empty column names
Guolin Ke's avatar
Guolin Ke committed
393
      colnames <- as.character(colnames)
394
      if (length(colnames) == 0) {
395
        return(invisible(self))
396
      }
James Lamb's avatar
James Lamb committed
397

398
      # Write column names
Guolin Ke's avatar
Guolin Ke committed
399
400
      private$colnames <- colnames
      if (!lgb.is.null.handle(private$handle)) {
James Lamb's avatar
James Lamb committed
401

402
        # Merge names with tab separation
Guolin Ke's avatar
Guolin Ke committed
403
        merged_name <- paste0(as.list(private$colnames), collapse = "\t")
404
405
406
407
408
409
        lgb.call(
          "LGBM_DatasetSetFeatureNames_R"
          , ret = NULL
          , private$handle
          , lgb.c_str(merged_name)
        )
James Lamb's avatar
James Lamb committed
410

Guolin Ke's avatar
Guolin Ke committed
411
      }
James Lamb's avatar
James Lamb committed
412

413
      # Return self
414
      return(invisible(self))
James Lamb's avatar
James Lamb committed
415

Guolin Ke's avatar
Guolin Ke committed
416
    },
James Lamb's avatar
James Lamb committed
417

418
    # Get information
Guolin Ke's avatar
Guolin Ke committed
419
    getinfo = function(name) {
James Lamb's avatar
James Lamb committed
420

421
      # Create known attributes list
422
      INFONAMES <- c("label", "weight", "init_score", "group")
James Lamb's avatar
James Lamb committed
423

424
425
426
      # Check if attribute key is in the known attribute list
      if (!is.character(name) || length(name) != 1 || !name %in% INFONAMES) {
        stop("getinfo: name must one of the following: ", paste0(sQuote(INFONAMES), collapse = ", "))
Guolin Ke's avatar
Guolin Ke committed
427
      }
James Lamb's avatar
James Lamb committed
428

429
      # Check for info name and handle
430
      if (is.null(private$info[[name]])) {
431

432
        if (lgb.is.null.handle(private$handle)){
433
          stop("Cannot perform getinfo before constructing Dataset.")
434
        }
435

436
        # Get field size of info
437
        info_len <- 0L
438
439
440
441
442
443
        info_len <- lgb.call(
          "LGBM_DatasetGetFieldSize_R"
          , ret = info_len
          , private$handle
          , lgb.c_str(name)
        )
James Lamb's avatar
James Lamb committed
444

445
        # Check if info is not empty
Guolin Ke's avatar
Guolin Ke committed
446
        if (info_len > 0) {
James Lamb's avatar
James Lamb committed
447

448
          # Get back fields
Guolin Ke's avatar
Guolin Ke committed
449
          ret <- NULL
450
451
452
453
454
          ret <- if (name == "group") {
            integer(info_len) # Integer
          } else {
            numeric(info_len) # Numeric
          }
James Lamb's avatar
James Lamb committed
455

456
457
458
459
460
461
          ret <- lgb.call(
            "LGBM_DatasetGetField_R"
            , ret = ret
            , private$handle
            , lgb.c_str(name)
          )
James Lamb's avatar
James Lamb committed
462

Guolin Ke's avatar
Guolin Ke committed
463
          private$info[[name]] <- ret
James Lamb's avatar
James Lamb committed
464

Guolin Ke's avatar
Guolin Ke committed
465
466
        }
      }
James Lamb's avatar
James Lamb committed
467

468
      private$info[[name]]
James Lamb's avatar
James Lamb committed
469

Guolin Ke's avatar
Guolin Ke committed
470
    },
James Lamb's avatar
James Lamb committed
471

472
    # Set information
Guolin Ke's avatar
Guolin Ke committed
473
    setinfo = function(name, info) {
James Lamb's avatar
James Lamb committed
474

475
      # Create known attributes list
476
      INFONAMES <- c("label", "weight", "init_score", "group")
James Lamb's avatar
James Lamb committed
477

478
479
480
481
      # Check if attribute key is in the known attribute list
      if (!is.character(name) || length(name) != 1 || !name %in% INFONAMES) {
        stop("setinfo: name must one of the following: ", paste0(sQuote(INFONAMES), collapse = ", "))
      }
James Lamb's avatar
James Lamb committed
482

483
484
485
486
487
488
      # Check for type of information
      info <- if (name == "group") {
        as.integer(info) # Integer
      } else {
        as.numeric(info) # Numeric
      }
James Lamb's avatar
James Lamb committed
489

490
      # Store information privately
Guolin Ke's avatar
Guolin Ke committed
491
      private$info[[name]] <- info
James Lamb's avatar
James Lamb committed
492

493
      if (!lgb.is.null.handle(private$handle) && !is.null(info)) {
James Lamb's avatar
James Lamb committed
494

Guolin Ke's avatar
Guolin Ke committed
495
        if (length(info) > 0) {
James Lamb's avatar
James Lamb committed
496

497
498
499
500
501
502
503
504
          lgb.call(
            "LGBM_DatasetSetField_R"
            , ret = NULL
            , private$handle
            , lgb.c_str(name)
            , info
            , length(info)
          )
James Lamb's avatar
James Lamb committed
505

Guolin Ke's avatar
Guolin Ke committed
506
        }
James Lamb's avatar
James Lamb committed
507

Guolin Ke's avatar
Guolin Ke committed
508
      }
James Lamb's avatar
James Lamb committed
509

510
      # Return self
511
      return(invisible(self))
James Lamb's avatar
James Lamb committed
512

Guolin Ke's avatar
Guolin Ke committed
513
    },
James Lamb's avatar
James Lamb committed
514

515
    # Slice dataset
Guolin Ke's avatar
Guolin Ke committed
516
    slice = function(idxset, ...) {
James Lamb's avatar
James Lamb committed
517

518
      # Perform slicing
519
520
521
522
523
524
525
526
527
528
529
530
      Dataset$new(
        data = NULL
        , params = private$params
        , reference = self
        , colnames = private$colnames
        , categorical_feature = private$categorical_feature
        , predictor = private$predictor
        , free_raw_data = private$free_raw_data
        , used_indices = sort(idxset, decreasing = FALSE)
        , info = NULL
        , ...
      )
James Lamb's avatar
James Lamb committed
531

Guolin Ke's avatar
Guolin Ke committed
532
    },
James Lamb's avatar
James Lamb committed
533

534
    # Update parameters
535
    update_params = function(params) {
James Lamb's avatar
James Lamb committed
536

537
      # Parameter updating
538
      if (!lgb.is.null.handle(private$handle)) {
539
540
541
542
543
544
        lgb.call(
          "LGBM_DatasetUpdateParam_R"
          , ret = NULL
          , private$handle
          , lgb.params2str(params)
        )
545
546
        return(invisible(self))
      }
Guolin Ke's avatar
Guolin Ke committed
547
      private$params <- modifyList(private$params, params)
548
      return(invisible(self))
James Lamb's avatar
James Lamb committed
549

Guolin Ke's avatar
Guolin Ke committed
550
    },
James Lamb's avatar
James Lamb committed
551

552
    # Set categorical feature parameter
553
    set_categorical_feature = function(categorical_feature) {
James Lamb's avatar
James Lamb committed
554

555
556
      # Check for identical input
      if (identical(private$categorical_feature, categorical_feature)) {
557
        return(invisible(self))
558
      }
James Lamb's avatar
James Lamb committed
559

560
      # Check for empty data
561
      if (is.null(private$raw_data)) {
562
563
        stop("set_categorical_feature: cannot set categorical feature after freeing raw data,
          please set ", sQuote("free_raw_data = FALSE"), " when you construct lgb.Dataset")
564
      }
James Lamb's avatar
James Lamb committed
565

566
      # Overwrite categorical features
567
      private$categorical_feature <- categorical_feature
James Lamb's avatar
James Lamb committed
568

569
      # Finalize and return self
570
      self$finalize()
571
      return(invisible(self))
James Lamb's avatar
James Lamb committed
572

573
    },
James Lamb's avatar
James Lamb committed
574

575
    # Set reference
Guolin Ke's avatar
Guolin Ke committed
576
    set_reference = function(reference) {
James Lamb's avatar
James Lamb committed
577

578
      # Set known references
579
      self$set_categorical_feature(reference$.__enclos_env__$private$categorical_feature)
Guolin Ke's avatar
Guolin Ke committed
580
581
      self$set_colnames(reference$get_colnames())
      private$set_predictor(reference$.__enclos_env__$private$predictor)
James Lamb's avatar
James Lamb committed
582

583
584
      # Check for identical references
      if (identical(private$reference, reference)) {
585
        return(invisible(self))
586
      }
James Lamb's avatar
James Lamb committed
587

588
      # Check for empty data
Guolin Ke's avatar
Guolin Ke committed
589
      if (is.null(private$raw_data)) {
James Lamb's avatar
James Lamb committed
590

591
592
        stop("set_reference: cannot set reference after freeing raw data,
          please set ", sQuote("free_raw_data = FALSE"), " when you construct lgb.Dataset")
James Lamb's avatar
James Lamb committed
593

Guolin Ke's avatar
Guolin Ke committed
594
      }
James Lamb's avatar
James Lamb committed
595

596
      # Check for non-existing reference
Guolin Ke's avatar
Guolin Ke committed
597
      if (!is.null(reference)) {
James Lamb's avatar
James Lamb committed
598

599
        # Reference is unknown
Guolin Ke's avatar
Guolin Ke committed
600
        if (!lgb.check.r6.class(reference, "lgb.Dataset")) {
601
          stop("set_reference: Can only use lgb.Dataset as a reference")
Guolin Ke's avatar
Guolin Ke committed
602
        }
James Lamb's avatar
James Lamb committed
603

Guolin Ke's avatar
Guolin Ke committed
604
      }
James Lamb's avatar
James Lamb committed
605

606
      # Store reference
Guolin Ke's avatar
Guolin Ke committed
607
      private$reference <- reference
James Lamb's avatar
James Lamb committed
608

609
      # Finalize and return self
Guolin Ke's avatar
Guolin Ke committed
610
      self$finalize()
611
      return(invisible(self))
James Lamb's avatar
James Lamb committed
612

Guolin Ke's avatar
Guolin Ke committed
613
    },
James Lamb's avatar
James Lamb committed
614

615
    # Save binary model
Guolin Ke's avatar
Guolin Ke committed
616
    save_binary = function(fname) {
James Lamb's avatar
James Lamb committed
617

618
      # Store binary data
Guolin Ke's avatar
Guolin Ke committed
619
      self$construct()
620
621
622
623
624
625
      lgb.call(
        "LGBM_DatasetSaveBinary_R"
        , ret = NULL
        , private$handle
        , lgb.c_str(fname)
      )
626
      return(invisible(self))
Guolin Ke's avatar
Guolin Ke committed
627
    }
James Lamb's avatar
James Lamb committed
628

Guolin Ke's avatar
Guolin Ke committed
629
630
  ),
  private = list(
631
632
633
634
635
    handle = NULL,
    raw_data = NULL,
    params = list(),
    reference = NULL,
    colnames = NULL,
636
    categorical_feature = NULL,
637
638
639
640
    predictor = NULL,
    free_raw_data = TRUE,
    used_indices = NULL,
    info = NULL,
James Lamb's avatar
James Lamb committed
641

642
643
    # Get handle
    get_handle = function() {
James Lamb's avatar
James Lamb committed
644

645
646
647
648
      # Get handle and construct if needed
      if (lgb.is.null.handle(private$handle)) {
        self$construct()
      }
649
      private$handle
James Lamb's avatar
James Lamb committed
650

Guolin Ke's avatar
Guolin Ke committed
651
    },
James Lamb's avatar
James Lamb committed
652

653
    # Set predictor
Guolin Ke's avatar
Guolin Ke committed
654
    set_predictor = function(predictor) {
James Lamb's avatar
James Lamb committed
655

656
657
      # Return self is identical predictor
      if (identical(private$predictor, predictor)) {
658
        return(invisible(self))
659
      }
James Lamb's avatar
James Lamb committed
660

661
      # Check for empty data
Guolin Ke's avatar
Guolin Ke committed
662
      if (is.null(private$raw_data)) {
663
664
        stop("set_predictor: cannot set predictor after free raw data,
          please set ", sQuote("free_raw_data = FALSE"), " when you construct lgb.Dataset")
Guolin Ke's avatar
Guolin Ke committed
665
      }
James Lamb's avatar
James Lamb committed
666

667
      # Check for empty predictor
Guolin Ke's avatar
Guolin Ke committed
668
      if (!is.null(predictor)) {
James Lamb's avatar
James Lamb committed
669

670
        # Predictor is unknown
Guolin Ke's avatar
Guolin Ke committed
671
        if (!lgb.check.r6.class(predictor, "lgb.Predictor")) {
672
          stop("set_predictor: Can only use lgb.Predictor as predictor")
Guolin Ke's avatar
Guolin Ke committed
673
        }
James Lamb's avatar
James Lamb committed
674

Guolin Ke's avatar
Guolin Ke committed
675
      }
James Lamb's avatar
James Lamb committed
676

677
      # Store predictor
Guolin Ke's avatar
Guolin Ke committed
678
      private$predictor <- predictor
James Lamb's avatar
James Lamb committed
679

680
      # Finalize and return self
Guolin Ke's avatar
Guolin Ke committed
681
      self$finalize()
682
      return(invisible(self))
James Lamb's avatar
James Lamb committed
683

Guolin Ke's avatar
Guolin Ke committed
684
    }
James Lamb's avatar
James Lamb committed
685

Guolin Ke's avatar
Guolin Ke committed
686
687
688
  )
)

Nikita Titov's avatar
Nikita Titov committed
689
#' Construct \code{lgb.Dataset} object
Guolin Ke's avatar
Guolin Ke committed
690
#'
Nikita Titov's avatar
Nikita Titov committed
691
#' Construct \code{lgb.Dataset} object from dense matrix, sparse matrix
Guolin Ke's avatar
Guolin Ke committed
692
693
694
695
696
697
#' or local file (that was created previously by saving an \code{lgb.Dataset}).
#'
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
#' @param params a list of parameters
#' @param reference reference dataset
#' @param colnames names of columns
698
#' @param categorical_feature categorical features
Guolin Ke's avatar
Guolin Ke committed
699
#' @param free_raw_data TRUE for need to free raw data after construct
Nikita Titov's avatar
Nikita Titov committed
700
#' @param info a list of information of the \code{lgb.Dataset} object
Guolin Ke's avatar
Guolin Ke committed
701
#' @param ... other information to pass to \code{info} or parameters pass to \code{params}
James Lamb's avatar
James Lamb committed
702
#'
Guolin Ke's avatar
Guolin Ke committed
703
#' @return constructed dataset
James Lamb's avatar
James Lamb committed
704
#'
Guolin Ke's avatar
Guolin Ke committed
705
#' @examples
706
707
708
709
710
711
712
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.save(dtrain, "lgb.Dataset.data")
#' dtrain <- lgb.Dataset("lgb.Dataset.data")
#' lgb.Dataset.construct(dtrain)
James Lamb's avatar
James Lamb committed
713
#'
Guolin Ke's avatar
Guolin Ke committed
714
715
#' @export
lgb.Dataset <- function(data,
716
717
718
                        params = list(),
                        reference = NULL,
                        colnames = NULL,
719
                        categorical_feature = NULL,
720
721
                        free_raw_data = TRUE,
                        info = list(),
Guolin Ke's avatar
Guolin Ke committed
722
                        ...) {
James Lamb's avatar
James Lamb committed
723

724
  # Create new dataset
725
726
727
728
729
730
731
732
733
734
735
736
  invisible(Dataset$new(
    data = data
    , params = params
    , reference = reference
    , colnames = colnames
    , categorical_feature = categorical_feature
    , predictor = NULL
    , free_raw_data = free_raw_data
    , used_indices = NULL
    , info = info
    , ...
  ))
James Lamb's avatar
James Lamb committed
737

Guolin Ke's avatar
Guolin Ke committed
738
739
}

wxchan's avatar
wxchan committed
740
#' Construct validation data
James Lamb's avatar
James Lamb committed
741
#'
wxchan's avatar
wxchan committed
742
#' Construct validation data according to training data
James Lamb's avatar
James Lamb committed
743
#'
Guolin Ke's avatar
Guolin Ke committed
744
745
#' @param dataset \code{lgb.Dataset} object, training data
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
Nikita Titov's avatar
Nikita Titov committed
746
#' @param info a list of information of the \code{lgb.Dataset} object
Guolin Ke's avatar
Guolin Ke committed
747
#' @param ... other information to pass to \code{info}.
James Lamb's avatar
James Lamb committed
748
#'
Guolin Ke's avatar
Guolin Ke committed
749
#' @return constructed dataset
James Lamb's avatar
James Lamb committed
750
#'
Guolin Ke's avatar
Guolin Ke committed
751
#' @examples
752
753
754
755
756
757
758
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' data(agaricus.test, package = "lightgbm")
#' test <- agaricus.test
#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
James Lamb's avatar
James Lamb committed
759
#'
Guolin Ke's avatar
Guolin Ke committed
760
#' @export
761
lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) {
James Lamb's avatar
James Lamb committed
762

763
  # Check if dataset is not a dataset
764
765
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.create.valid: input data should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
766
  }
James Lamb's avatar
James Lamb committed
767

768
  # Create validation dataset
769
  invisible(dataset$create_valid(data, info, ...))
James Lamb's avatar
James Lamb committed
770

771
}
Guolin Ke's avatar
Guolin Ke committed
772

773
#' Construct Dataset explicitly
James Lamb's avatar
James Lamb committed
774
#'
Guolin Ke's avatar
Guolin Ke committed
775
#' @param dataset Object of class \code{lgb.Dataset}
James Lamb's avatar
James Lamb committed
776
#'
Guolin Ke's avatar
Guolin Ke committed
777
#' @examples
778
779
780
781
782
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.construct(dtrain)
James Lamb's avatar
James Lamb committed
783
#'
Guolin Ke's avatar
Guolin Ke committed
784
785
#' @export
lgb.Dataset.construct <- function(dataset) {
James Lamb's avatar
James Lamb committed
786

787
  # Check if dataset is not a dataset
788
789
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.construct: input data should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
790
  }
James Lamb's avatar
James Lamb committed
791

792
  # Construct the dataset
793
  invisible(dataset$construct())
James Lamb's avatar
James Lamb committed
794

Guolin Ke's avatar
Guolin Ke committed
795
796
}

Nikita Titov's avatar
Nikita Titov committed
797
#' Dimensions of an \code{lgb.Dataset}
James Lamb's avatar
James Lamb committed
798
#'
Guolin Ke's avatar
Guolin Ke committed
799
800
801
#' Returns a vector of numbers of rows and of columns in an \code{lgb.Dataset}.
#' @param x Object of class \code{lgb.Dataset}
#' @param ... other parameters
James Lamb's avatar
James Lamb committed
802
#'
Guolin Ke's avatar
Guolin Ke committed
803
#' @return a vector of numbers of rows and of columns
James Lamb's avatar
James Lamb committed
804
#'
Guolin Ke's avatar
Guolin Ke committed
805
806
807
#' @details
#' Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also
#' be directly used with an \code{lgb.Dataset} object.
James Lamb's avatar
James Lamb committed
808
#'
Guolin Ke's avatar
Guolin Ke committed
809
#' @examples
810
811
812
813
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
James Lamb's avatar
James Lamb committed
814
#'
815
816
817
#' stopifnot(nrow(dtrain) == nrow(train$data))
#' stopifnot(ncol(dtrain) == ncol(train$data))
#' stopifnot(all(dim(dtrain) == dim(train$data)))
James Lamb's avatar
James Lamb committed
818
#'
Guolin Ke's avatar
Guolin Ke committed
819
820
821
#' @rdname dim
#' @export
dim.lgb.Dataset <- function(x, ...) {
James Lamb's avatar
James Lamb committed
822

823
  # Check if dataset is not a dataset
824
825
  if (!lgb.is.Dataset(x)) {
    stop("dim.lgb.Dataset: input data should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
826
  }
James Lamb's avatar
James Lamb committed
827

828
  # Return dimensions
829
  x$dim()
James Lamb's avatar
James Lamb committed
830

Guolin Ke's avatar
Guolin Ke committed
831
832
833
834
835
}

#' Handling of column names of \code{lgb.Dataset}
#'
#' Only column names are supported for \code{lgb.Dataset}, thus setting of
836
#' row names would have no effect and returned row names would be NULL.
Guolin Ke's avatar
Guolin Ke committed
837
838
839
#'
#' @param x object of class \code{lgb.Dataset}
#' @param value a list of two elements: the first one is ignored
840
#'              and the second one is column names
Guolin Ke's avatar
Guolin Ke committed
841
842
843
844
845
846
#'
#' @details
#' Generic \code{dimnames} methods are used by \code{colnames}.
#' Since row names are irrelevant, it is recommended to use \code{colnames} directly.
#'
#' @examples
847
848
849
850
851
852
853
854
855
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.construct(dtrain)
#' dimnames(dtrain)
#' colnames(dtrain)
#' colnames(dtrain) <- make.names(1:ncol(train$data))
#' print(dtrain, verbose = TRUE)
James Lamb's avatar
James Lamb committed
856
#'
Guolin Ke's avatar
Guolin Ke committed
857
858
859
#' @rdname dimnames.lgb.Dataset
#' @export
dimnames.lgb.Dataset <- function(x) {
James Lamb's avatar
James Lamb committed
860

861
  # Check if dataset is not a dataset
862
863
  if (!lgb.is.Dataset(x)) {
    stop("dimnames.lgb.Dataset: input data should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
864
  }
James Lamb's avatar
James Lamb committed
865

866
  # Return dimension names
867
  list(NULL, x$get_colnames())
James Lamb's avatar
James Lamb committed
868

Guolin Ke's avatar
Guolin Ke committed
869
870
871
872
873
}

#' @rdname dimnames.lgb.Dataset
#' @export
`dimnames<-.lgb.Dataset` <- function(x, value) {
James Lamb's avatar
James Lamb committed
874

875
876
  # Check if invalid element list
  if (!is.list(value) || length(value) != 2L) {
877
    stop("invalid ", sQuote("value"), " given: must be a list of two elements")
878
  }
James Lamb's avatar
James Lamb committed
879

880
881
882
883
  # Check for unknown row names
  if (!is.null(value[[1L]])) {
    stop("lgb.Dataset does not have rownames")
  }
James Lamb's avatar
James Lamb committed
884

885
  # Check for second value missing
Guolin Ke's avatar
Guolin Ke committed
886
  if (is.null(value[[2]])) {
James Lamb's avatar
James Lamb committed
887

888
    # No column names
Guolin Ke's avatar
Guolin Ke committed
889
890
    x$set_colnames(NULL)
    return(x)
James Lamb's avatar
James Lamb committed
891

892
  }
James Lamb's avatar
James Lamb committed
893

894
895
  # Check for unmatching column size
  if (ncol(x) != length(value[[2]])) {
896
897
898
899
900
901
902
    stop(
      "can't assign "
      , sQuote(length(value[[2]]))
      , " colnames to an lgb.Dataset with "
      , sQuote(ncol(x))
      , " columns"
    )
Guolin Ke's avatar
Guolin Ke committed
903
  }
James Lamb's avatar
James Lamb committed
904

905
  # Set column names properly, and return
Guolin Ke's avatar
Guolin Ke committed
906
  x$set_colnames(value[[2]])
907
  x
James Lamb's avatar
James Lamb committed
908

Guolin Ke's avatar
Guolin Ke committed
909
910
}

911
#' Slice a dataset
James Lamb's avatar
James Lamb committed
912
#'
913
#' Get a new \code{lgb.Dataset} containing the specified rows of
Nikita Titov's avatar
Nikita Titov committed
914
#' original \code{lgb.Dataset} object
915
#'
Nikita Titov's avatar
Nikita Titov committed
916
#' @param dataset Object of class \code{lgb.Dataset}
917
#' @param idxset an integer vector of indices of rows needed
Guolin Ke's avatar
Guolin Ke committed
918
919
#' @param ... other parameters (currently not used)
#' @return constructed sub dataset
James Lamb's avatar
James Lamb committed
920
#'
Guolin Ke's avatar
Guolin Ke committed
921
#' @examples
922
923
924
925
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
James Lamb's avatar
James Lamb committed
926
#'
927
#' dsub <- lightgbm::slice(dtrain, 1:42)
928
#' lgb.Dataset.construct(dsub)
929
#' labels <- lightgbm::getinfo(dsub, "label")
James Lamb's avatar
James Lamb committed
930
#'
Guolin Ke's avatar
Guolin Ke committed
931
#' @export
932
933
934
slice <- function(dataset, ...) {
  UseMethod("slice")
}
Guolin Ke's avatar
Guolin Ke committed
935
936
937
938

#' @rdname slice
#' @export
slice.lgb.Dataset <- function(dataset, idxset, ...) {
James Lamb's avatar
James Lamb committed
939

940
  # Check if dataset is not a dataset
941
942
  if (!lgb.is.Dataset(dataset)) {
    stop("slice.lgb.Dataset: input dataset should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
943
  }
James Lamb's avatar
James Lamb committed
944

945
  # Return sliced set
946
  invisible(dataset$slice(idxset, ...))
James Lamb's avatar
James Lamb committed
947

Guolin Ke's avatar
Guolin Ke committed
948
949
}

Nikita Titov's avatar
Nikita Titov committed
950
#' Get information of an \code{lgb.Dataset} object
James Lamb's avatar
James Lamb committed
951
#'
Guolin Ke's avatar
Guolin Ke committed
952
953
954
955
#' @param dataset Object of class \code{lgb.Dataset}
#' @param name the name of the information field to get (see details)
#' @param ... other parameters
#' @return info data
James Lamb's avatar
James Lamb committed
956
#'
Guolin Ke's avatar
Guolin Ke committed
957
958
#' @details
#' The \code{name} field can be one of the following:
James Lamb's avatar
James Lamb committed
959
#'
Guolin Ke's avatar
Guolin Ke committed
960
961
962
#' \itemize{
#'     \item \code{label}: label lightgbm learn from ;
#'     \item \code{weight}: to do a weight rescale ;
Nikita Titov's avatar
Nikita Titov committed
963
964
#'     \item \code{group}: group size ;
#'     \item \code{init_score}: initial score is the base prediction lightgbm will boost from.
Guolin Ke's avatar
Guolin Ke committed
965
#' }
James Lamb's avatar
James Lamb committed
966
#'
Guolin Ke's avatar
Guolin Ke committed
967
#' @examples
968
969
970
971
972
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.construct(dtrain)
James Lamb's avatar
James Lamb committed
973
#'
974
975
#' labels <- lightgbm::getinfo(dtrain, "label")
#' lightgbm::setinfo(dtrain, "label", 1 - labels)
James Lamb's avatar
James Lamb committed
976
#'
977
978
#' labels2 <- lightgbm::getinfo(dtrain, "label")
#' stopifnot(all(labels2 == 1 - labels))
James Lamb's avatar
James Lamb committed
979
#'
Guolin Ke's avatar
Guolin Ke committed
980
#' @export
981
982
983
getinfo <- function(dataset, ...) {
  UseMethod("getinfo")
}
Guolin Ke's avatar
Guolin Ke committed
984
985
986
987

#' @rdname getinfo
#' @export
getinfo.lgb.Dataset <- function(dataset, name, ...) {
James Lamb's avatar
James Lamb committed
988

989
  # Check if dataset is not a dataset
990
991
  if (!lgb.is.Dataset(dataset)) {
    stop("getinfo.lgb.Dataset: input dataset should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
992
  }
James Lamb's avatar
James Lamb committed
993

994
  # Return information
995
  dataset$getinfo(name)
James Lamb's avatar
James Lamb committed
996

Guolin Ke's avatar
Guolin Ke committed
997
998
}

Nikita Titov's avatar
Nikita Titov committed
999
#' Set information of an \code{lgb.Dataset} object
James Lamb's avatar
James Lamb committed
1000
#'
Nikita Titov's avatar
Nikita Titov committed
1001
#' @param dataset Object of class \code{lgb.Dataset}
Guolin Ke's avatar
Guolin Ke committed
1002
1003
1004
1005
#' @param name the name of the field to get
#' @param info the specific field of information to set
#' @param ... other parameters
#' @return passed object
James Lamb's avatar
James Lamb committed
1006
#'
Guolin Ke's avatar
Guolin Ke committed
1007
1008
#' @details
#' The \code{name} field can be one of the following:
James Lamb's avatar
James Lamb committed
1009
#'
Guolin Ke's avatar
Guolin Ke committed
1010
1011
1012
1013
1014
1015
#' \itemize{
#'     \item \code{label}: label lightgbm learn from ;
#'     \item \code{weight}: to do a weight rescale ;
#'     \item \code{init_score}: initial score is the base prediction lightgbm will boost from ;
#'     \item \code{group}.
#' }
James Lamb's avatar
James Lamb committed
1016
#'
Guolin Ke's avatar
Guolin Ke committed
1017
#' @examples
1018
1019
1020
1021
1022
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.construct(dtrain)
James Lamb's avatar
James Lamb committed
1023
#'
1024
1025
#' labels <- lightgbm::getinfo(dtrain, "label")
#' lightgbm::setinfo(dtrain, "label", 1 - labels)
James Lamb's avatar
James Lamb committed
1026
#'
1027
1028
#' labels2 <- lightgbm::getinfo(dtrain, "label")
#' stopifnot(all.equal(labels2, 1 - labels))
James Lamb's avatar
James Lamb committed
1029
#'
Guolin Ke's avatar
Guolin Ke committed
1030
#' @export
1031
1032
1033
setinfo <- function(dataset, ...) {
  UseMethod("setinfo")
}
Guolin Ke's avatar
Guolin Ke committed
1034
1035
1036
1037

#' @rdname setinfo
#' @export
setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
James Lamb's avatar
James Lamb committed
1038

1039
  # Check if dataset is not a dataset
1040
1041
  if (!lgb.is.Dataset(dataset)) {
    stop("setinfo.lgb.Dataset: input dataset should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
1042
  }
James Lamb's avatar
James Lamb committed
1043

1044
  # Set information
1045
  invisible(dataset$setinfo(name, info))
Guolin Ke's avatar
Guolin Ke committed
1046
1047
}

1048
#' Set categorical feature of \code{lgb.Dataset}
James Lamb's avatar
James Lamb committed
1049
#'
1050
1051
#' @param dataset object of class \code{lgb.Dataset}
#' @param categorical_feature categorical features
James Lamb's avatar
James Lamb committed
1052
#'
1053
#' @return passed dataset
James Lamb's avatar
James Lamb committed
1054
#'
1055
#' @examples
1056
1057
1058
1059
1060
1061
1062
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.save(dtrain, "lgb.Dataset.data")
#' dtrain <- lgb.Dataset("lgb.Dataset.data")
#' lgb.Dataset.set.categorical(dtrain, 1:2)
James Lamb's avatar
James Lamb committed
1063
#'
1064
1065
1066
#' @rdname lgb.Dataset.set.categorical
#' @export
lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
James Lamb's avatar
James Lamb committed
1067

1068
  # Check if dataset is not a dataset
1069
1070
1071
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.set.categorical: input dataset should be an lgb.Dataset object")
  }
James Lamb's avatar
James Lamb committed
1072

1073
  # Set categoricals
1074
  invisible(dataset$set_categorical_feature(categorical_feature))
James Lamb's avatar
James Lamb committed
1075

1076
1077
}

1078
#' Set reference of \code{lgb.Dataset}
James Lamb's avatar
James Lamb committed
1079
#'
1080
#' If you want to use validation data, you should set reference to training data
James Lamb's avatar
James Lamb committed
1081
#'
Guolin Ke's avatar
Guolin Ke committed
1082
1083
#' @param dataset object of class \code{lgb.Dataset}
#' @param reference object of class \code{lgb.Dataset}
James Lamb's avatar
James Lamb committed
1084
#'
Guolin Ke's avatar
Guolin Ke committed
1085
#' @return passed dataset
James Lamb's avatar
James Lamb committed
1086
#'
Guolin Ke's avatar
Guolin Ke committed
1087
#' @examples
1088
1089
1090
1091
1092
1093
1094
1095
#' library(lightgbm)
#' data(agaricus.train, package ="lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' data(agaricus.test, package = "lightgbm")
#' test <- agaricus.test
#' dtest <- lgb.Dataset(test$data, test = train$label)
#' lgb.Dataset.set.reference(dtest, dtrain)
James Lamb's avatar
James Lamb committed
1096
#'
Guolin Ke's avatar
Guolin Ke committed
1097
1098
1099
#' @rdname lgb.Dataset.set.reference
#' @export
lgb.Dataset.set.reference <- function(dataset, reference) {
James Lamb's avatar
James Lamb committed
1100

1101
  # Check if dataset is not a dataset
1102
1103
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.set.reference: input dataset should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
1104
  }
James Lamb's avatar
James Lamb committed
1105

1106
  # Set reference
1107
  invisible(dataset$set_reference(reference))
Guolin Ke's avatar
Guolin Ke committed
1108
1109
}

1110
#' Save \code{lgb.Dataset} to a binary file
James Lamb's avatar
James Lamb committed
1111
#'
Guolin Ke's avatar
Guolin Ke committed
1112
1113
#' @param dataset object of class \code{lgb.Dataset}
#' @param fname object filename of output file
James Lamb's avatar
James Lamb committed
1114
#'
Guolin Ke's avatar
Guolin Ke committed
1115
#' @return passed dataset
James Lamb's avatar
James Lamb committed
1116
#'
Guolin Ke's avatar
Guolin Ke committed
1117
#' @examples
1118
1119
1120
1121
1122
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.save(dtrain, "data.bin")
James Lamb's avatar
James Lamb committed
1123
#'
Guolin Ke's avatar
Guolin Ke committed
1124
1125
1126
#' @rdname lgb.Dataset.save
#' @export
lgb.Dataset.save <- function(dataset, fname) {
James Lamb's avatar
James Lamb committed
1127

1128
  # Check if dataset is not a dataset
1129
1130
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.set: input dataset should be an lgb.Dataset object")
Guolin Ke's avatar
Guolin Ke committed
1131
  }
James Lamb's avatar
James Lamb committed
1132

1133
  # File-type is not matching
1134
1135
  if (!is.character(fname)) {
    stop("lgb.Dataset.set: fname should be a character or a file connection")
Guolin Ke's avatar
Guolin Ke committed
1136
  }
James Lamb's avatar
James Lamb committed
1137

1138
  # Store binary
1139
  invisible(dataset$save_binary(fname))
Guolin Ke's avatar
Guolin Ke committed
1140
}