Merge branch 'master' of https://github.com/Microsoft/LightGBM

4f232570 · Guolin Ke · 9962e6d6 · 1b0c2742 · 4f232570 · 4f232570
Commit 4f232570 authored Jan 25, 2017 by Guolin Ke
Showing with 99 additions and 0 deletions

R-package/NAMESPACE R-package/NAMESPACE +2 -0

R-package/R/lgb.importance.R R-package/R/lgb.importance.R +55 -0

R-package/man/lgb.importance.Rd R-package/man/lgb.importance.Rd +42 -0

No files found.
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -17,6 +17,7 @@ export(lgb.Dataset.set.reference)
 export(lgb.cv)
 export(lgb.dump)
 export(lgb.get.eval.result)
+export(lgb.importance)
 export(lgb.load)
 export(lgb.model.dt.tree)
 export(lgb.save)
@@ -28,4 +29,5 @@ import(methods)
 importFrom(R6,R6Class)
 importFrom(data.table,":=")
 importFrom(magrittr,"%>%")
+importFrom(magrittr,"%T>%")
 useDynLib(lightgbm)
--- a/R-package/R/lgb.importance.R
+++ b/R-package/R/lgb.importance.R
+#' Compute feature importance in a model
+#'
+#' Creates a \code{data.table} of feature importances in a model.
+#'
+#' @param model object of class \code{lgb.Booster}.
+#' @param percentage whether to show importance in relative percentage.
+#'
+#' @return
+#'
+#' For a tree model, a \code{data.table} with the following columns:
+#' \itemize{
+#'   \item \code{Feature} Feature names in the model.
+#'   \item \code{Gain} The total gain of this feature's splits.
+#'   \item \code{Cover} The number of observation related to this feature.
+#'   \item \code{Frequency} The number of times a feature splited in trees.
+#' }
+#'
+#' @examples
+#'
+#' data(agaricus.train, package = 'lightgbm')
+#' train <- agaricus.train
+#' dtrain <- lgb.Dataset(train$data, label = train$label)
+#'
+#' params = list(objective = "binary",
+#'               learning_rate = 0.01, num_leaves = 63, max_depth = -1,
+#'               min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1)
+#'               model <- lgb.train(params, dtrain, 20)
+#' model <- lgb.train(params, dtrain, 20)
+#'
+#' tree_imp1 <- lgb.importance(model, percentage = TRUE)
+#' tree_imp2 <- lgb.importance(model, percentage = FALSE)
+#'
+#' @importFrom magrittr %>% %T>%
+#' @importFrom data.table :=
+#' @export
+lgb.importance <- function(model, percentage = TRUE) {
+  if (!any(class(model) == "lgb.Booster")) {
+    stop("'model' has to be an object of class lgb.Booster")
+  }
+  tree_dt <- lgb.model.dt.tree(model)
+  tree_imp <- tree_dt %>%
+    magrittr::extract(.,
+                      i = is.na(split_index) == FALSE,
+                      j = .(Gain = sum(split_gain), Cover = sum(internal_count), Frequency = .N),
+                      by = "split_feature") %T>%
+    data.table::setnames(., old = "split_feature", new = "Feature") %>%
+    magrittr::extract(., i = order(Gain, decreasing = TRUE))
+  if (percentage) {
+    tree_imp[, ":="(Gain = Gain / sum(Gain),
+                    Cover = Cover / sum(Cover),
+                    Frequency = Frequency / sum(Frequency))]
+  }
+  return(tree_imp)
+}
--- a/R-package/man/lgb.importance.Rd
+++ b/R-package/man/lgb.importance.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lgb.importance.R
+\name{lgb.importance}
+\alias{lgb.importance}
+\title{Compute feature importance in a model}
+\usage{
+lgb.importance(model, percentage = TRUE)
+}
+\arguments{
+\item{model}{object of class \code{lgb.Booster}.}
+\item{percentage}{whether to show importance in relative percentage.}
+}
+\value{
+For a tree model, a \code{data.table} with the following columns:
+\itemize{
+  \item \code{Feature} Feature names in the model.
+  \item \code{Gain} The total gain of this feature's splits.
+  \item \code{Cover} The number of observation related to this feature.
+  \item \code{Frequency} The number of times a feature splited in trees.
+}
+}
+\description{
+Creates a \code{data.table} of feature importances in a model.
+}
+\examples{
+data(agaricus.train, package = 'lightgbm')
+train <- agaricus.train
+dtrain <- lgb.Dataset(train$data, label = train$label)
+params = list(objective = "binary",
+              learning_rate = 0.01, num_leaves = 63, max_depth = -1,
+              min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1)
+              model <- lgb.train(params, dtrain, 20)
+model <- lgb.train(params, dtrain, 20)
+tree_imp1 <- lgb.importance(model, percentage = TRUE)
+tree_imp2 <- lgb.importance(model, percentage = FALSE)
+}