lgb.Dataset.create.valid.Rd 2.77 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Dataset.R
\name{lgb.Dataset.create.valid}
\alias{lgb.Dataset.create.valid}
\title{Construct validation data}
\usage{
7
8
9
10
11
12
13
lgb.Dataset.create.valid(
  dataset,
  data,
  label = NULL,
  weight = NULL,
  group = NULL,
  init_score = NULL,
14
  params = list()
15
)
Guolin Ke's avatar
Guolin Ke committed
16
17
18
19
}
\arguments{
\item{dataset}{\code{lgb.Dataset} object, training data}

20
21
22
\item{data}{a \code{matrix} object, a \code{dgCMatrix} object,
a character representing a path to a text file (CSV, TSV, or LibSVM),
or a character representing a path to a binary \code{Dataset} file}
Guolin Ke's avatar
Guolin Ke committed
23

24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
\item{label}{vector of labels to use as the target variable}

\item{weight}{numeric vector of sample weights}

\item{group}{used for learning-to-rank tasks. An integer vector describing how to
group rows together as ordered results from the same set of candidate results
to be ranked. For example, if you have a 100-document dataset with
\code{group = c(10, 20, 40, 10, 10, 10)}, that means that you have 6 groups,
where the first 10 records are in the first group, records 11-30 are in the
second group, etc.}

\item{init_score}{initial score is the base prediction lightgbm will boost from}

\item{params}{a list of parameters. See
\href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#dataset-parameters}{
The "Dataset Parameters" section of the documentation} for a list of parameters
and valid values. If this is an empty list (the default), the validation Dataset
will have the same parameters as the Dataset passed to argument \code{dataset}.}
Guolin Ke's avatar
Guolin Ke committed
42
43
44
45
46
47
48
49
}
\value{
constructed dataset
}
\description{
Construct validation data according to training data
}
\examples{
50
\donttest{
Guolin Ke's avatar
Guolin Ke committed
51
52
53
54
55
56
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
data(agaricus.test, package = "lightgbm")
test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91

# parameters can be changed between the training data and validation set,
# for example to account for training data in a text file with a header row
# and validation data in a text file without it
train_file <- tempfile(pattern = "train_", fileext = ".csv")
write.table(
  data.frame(y = rnorm(100L), x1 = rnorm(100L), x2 = rnorm(100L))
  , file = train_file
  , sep = ","
  , col.names = TRUE
  , row.names = FALSE
  , quote = FALSE
)

valid_file <- tempfile(pattern = "valid_", fileext = ".csv")
write.table(
  data.frame(y = rnorm(100L), x1 = rnorm(100L), x2 = rnorm(100L))
  , file = valid_file
  , sep = ","
  , col.names = FALSE
  , row.names = FALSE
  , quote = FALSE
)

dtrain <- lgb.Dataset(
  data = train_file
  , params = list(has_header = TRUE)
)
dtrain$construct()

dvalid <- lgb.Dataset(
  data = valid_file
  , params = list(has_header = FALSE)
)
dvalid$construct()
92
}
Guolin Ke's avatar
Guolin Ke committed
93
}