lgb.Dataset.Rd

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Dataset.R
\name{lgb.Dataset}
\alias{lgb.Dataset}
\title{Construct \code{lgb.Dataset} object}
\usage{
lgb.Dataset(
  data,
  params = list(),
  reference = NULL,
  colnames = NULL,
  categorical_feature = NULL,
  free_raw_data = TRUE,
  info = list(),
  label = NULL,
  weight = NULL,
  group = NULL,
  init_score = NULL,
  ...
)
}
\arguments{
\item{data}{a \code{matrix} object, a \code{dgCMatrix} object,
a character representing a path to a text file (CSV, TSV, or LibSVM),
or a character representing a path to a binary \code{lgb.Dataset} file}

\item{params}{a list of parameters. See
\href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#dataset-parameters}{
The "Dataset Parameters" section of the documentation} for a list of parameters
and valid values.}

\item{reference}{reference dataset. When LightGBM creates a Dataset, it does some preprocessing like binning
continuous features into histograms. If you want to apply the same bin boundaries from an existing
dataset to new \code{data}, pass that existing Dataset to this argument.}

\item{colnames}{names of columns}

\item{categorical_feature}{categorical features. This can either be a character vector of feature
names or an integer vector with the indices of the features (e.g.
\code{c(1L, 10L)} to say "the first and tenth columns").}

\item{free_raw_data}{LightGBM constructs its data format, called a "Dataset", from tabular data.
By default, that Dataset object on the R side does not keep a copy of the raw data.
This reduces LightGBM's memory consumption, but it means that the Dataset object
cannot be changed after it has been constructed. If you'd prefer to be able to
change the Dataset object after construction, set \code{free_raw_data = FALSE}.}

\item{info}{a list of information of the \code{lgb.Dataset} object. NOTE: use of \code{info}
is deprecated as of v3.3.0. Use keyword arguments (e.g. \code{init_score = init_score})
directly.}

\item{label}{vector of labels to use as the target variable}

\item{weight}{numeric vector of sample weights}

\item{group}{used for learning-to-rank tasks. An integer vector describing how to
group rows together as ordered results from the same set of candidate results
to be ranked. For example, if you have a 100-document dataset with
\code{group = c(10, 20, 40, 10, 10, 10)}, that means that you have 6 groups,
where the first 10 records are in the first group, records 11-30 are in the
second group, etc.}

\item{init_score}{initial score is the base prediction lightgbm will boost from}

\item{...}{other parameters passed to \code{params}}
}
\value{
constructed dataset
}
\description{
Construct \code{lgb.Dataset} object from dense matrix, sparse matrix
             or local file (that was created previously by saving an \code{lgb.Dataset}).
}
\examples{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
data_file <- tempfile(fileext = ".data")
lgb.Dataset.save(dtrain, data_file)
dtrain <- lgb.Dataset(data_file)
lgb.Dataset.construct(dtrain)
}
}