R package (#168)

* finish R's c_api * clean code * fix sizeof pointer in 32bit system. * add predictor class * add Dataset class * format code * add booster * add type check for expose function * add a simple callback * add all callbacks * finish the basic training logic * update docs * add an simple training interface * add basic test * adapt the changes in c_api * add test for Dataset * add test for custom obj/eval functions * fix python test * fix bug in metadata init * fix R CMD check

R package (#168)
* finish R's c_api * clean code * fix sizeof pointer in 32bit system. * add predictor class * add Dataset class * format code * add booster * add type check for expose function * add a simple callback * add all callbacks * finish the basic training logic * update docs * add an simple training interface * add basic test * adapt the changes in c_api * add test for Dataset * add test for custom obj/eval functions * fix python test * fix bug in metadata init * fix R CMD check
551d59ca · Guolin Ke · GitHub · acbd4f34 · 551d59ca · 551d59ca
Commit 551d59ca authored Jan 08, 2017 by Guolin Ke Committed by GitHub Jan 08, 2017
20 changed files
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
+Package: lightgbm
+Type: Package
+Title: Light Gradient Boosting Machine
+Version: 0.1
+Date: 2016-12-29
+Author: Guolin Ke <guolin.ke@microsoft.com>
+Maintainer: Guolin Ke <guolin.ke@microsoft.com>
+Description: LightGBM is a gradient boosting framework that uses tree based learning algorithms. 
+    It is designed to be distributed and efficient with the following advantages:
+        1.Faster training speed and higher efficiency. 
+        2.Lower memory usage. 
+        3.Better accuracy. 
+        4.Parallel learning supported 
+        5. Capable of handling large-scale data
+License: The MIT License (MIT) | file LICENSE
+URL: https://github.com/Microsoft/LightGBM
+BugReports: https://github.com/Microsoft/LightGBM/issues
+VignetteBuilder: knitr
+Suggests:
+    knitr,
+    rmarkdown,
+    ggplot2 (>= 1.0.1),
+    DiagrammeR (>= 0.8.1),
+    Ckmeans.1d.dp (>= 3.3.1),
+    vcd (>= 1.3),
+    testthat,
+    igraph (>= 1.0.1),
+    methods,
+    data.table (>= 1.9.6),
+    magrittr (>= 1.5),
+    stringi (>= 0.5.2)
+Depends:
+    R (>= 3.0),
+    R6
+Imports:
+    Matrix (>= 1.1-0)
+RoxygenNote: 5.0.1
\ No newline at end of file
--- a/R-package/LICENSE
+++ b/R-package/LICENSE
+The MIT License (MIT)
+
+Copyright (c) Microsoft Corporation 
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
+# Generated by roxygen2: do not edit by hand
+
+S3method("dimnames<-",lgb.Dataset)
+S3method(dim,lgb.Dataset)
+S3method(dimnames,lgb.Dataset)
+S3method(getinfo,lgb.Dataset)
+S3method(predict,lgb.Booster)
+S3method(setinfo,lgb.Dataset)
+S3method(slice,lgb.Dataset)
+export(getinfo)
+export(lgb.Dataset)
+export(lgb.Dataset.construct)
+export(lgb.Dataset.create.valid)
+export(lgb.Dataset.save)
+export(lgb.Dataset.set.categorical)
+export(lgb.Dataset.set.reference)
+export(lgb.dump)
+export(lgb.get.eval.result)
+export(lgb.load)
+export(lgb.save)
+export(lgb.train)
+export(lightgbm)
+export(setinfo)
+export(slice)
+importFrom(R6,R6Class)
+useDynLib(lightgbm)
--- a/R-package/R/callback.R
+++ b/R-package/R/callback.R
+CB_ENV <- R6Class(
+  "lgb.cb_env",
+  cloneable=FALSE,
+  public = list(
+    model=NULL,
+    iteration=NULL,
+    begin_iteration=NULL,
+    end_iteration=NULL,
+    eval_list=list(),
+    eval_err_list=list(),
+    best_iter=-1,
+    met_early_stop=FALSE
+  )
+)
+
+cb.reset.parameters <- function(new_params) {
+  if (typeof(new_params) != "list") 
+    stop("'new_params' must be a list")
+  pnames <- gsub("\\.", "_", names(new_params))
+  nrounds <- NULL
+  
+  # run some checks in the begining
+  init <- function(env) {
+    nrounds <<- env$end_iteration - env$begin_iteration + 1
+    
+    if (is.null(env$model))
+      stop("Env should has 'model'")
+    
+    # Some parameters are not allowed to be changed,
+    # since changing them would simply wreck some chaos
+    not_allowed <- pnames %in% 
+      c('num_class', 'metric', 'boosting_type')
+    if (any(not_allowed))
+      stop('Parameters ', paste(pnames[not_allowed]), " cannot be changed during boosting.")
+    
+    for (n in pnames) {
+      p <- new_params[[n]]
+      if (is.function(p)) {
+        if (length(formals(p)) != 2)
+          stop("Parameter '", n, "' is a function but not of two arguments")
+      } else if (is.numeric(p) || is.character(p)) {
+        if (length(p) != nrounds)
+          stop("Length of '", n, "' has to be equal to 'nrounds'")
+      } else {
+        stop("Parameter '", n, "' is not a function or a vector")
+      }
+    }
+  }
+  
+  callback <- function(env) {
+    if (is.null(nrounds))
+      init(env)
+    
+    i <- env$iteration - env$begin_iteration
+    pars <- lapply(new_params, function(p) {
+      if (is.function(p))
+        return(p(i, nrounds))
+      p[i]
+    })
+    # to-do check pars
+    if (!is.null(env$model)) {
+      env$model$reset_parameter(pars)
+    } 
+  }
+  attr(callback, 'call') <- match.call()
+  attr(callback, 'is_pre_iteration') <- TRUE
+  attr(callback, 'name') <- 'cb.reset.parameters'
+  return(callback)
+}
+
+# Format the evaluation metric string
+format.eval.string <- function(eval_res, eval_err=NULL) {
+  if (is.null(eval_res))
+    stop('no evaluation results')
+  if (length(eval_res) == 0)
+    stop('no evaluation results')
+  if (!is.null(eval_err)) {
+    res <- sprintf('%s\'s %s:%g+%g', eval_res$data_name, eval_res$name, eval_res$value, eval_err)
+  } else {
+    res <- sprintf('%s\'s %s:%g', eval_res$data_name, eval_res$name, eval_res$value)
+  }
+  return(res)
+}
+
+merge.eval.string <- function(env){
+  if(length(env$eval_list) <= 0){
+    return("")
+  }
+  msg <- list(sprintf('[%d]:',env$iteration))
+  is_eval_err <- FALSE
+  if(length(env$eval_err_list) > 0){
+    is_eval_err <- TRUE
+  }
+  for(j in 1:length(env$eval_list)) {
+    eval_err <- NULL
+    if(is_eval_err){
+      eval_err <- env$eval_err_list[[j]]
+    }
+    msg <- c(msg, format.eval.string(env$eval_list[[j]],eval_err))
+  }
+  return(paste0(msg, collapse='\t'))
+}
+
+cb.print.evaluation <- function(period=1){
+  callback <- function(env){
+    if(period > 0){
+      i <- env$iteration
+      if( (i - 1) %% period == 0
+         | i == env$begin_iteration
+         | i == env$end_iteration ){
+        cat(merge.eval.string(env), "\n")
+      }
+    }
+  }
+  attr(callback, 'call') <- match.call()
+  attr(callback, 'name') <- 'cb.print.evaluation'
+  return(callback)
+}
+
+cb.record.evaluation <- function() {
+  callback <- function(env){
+    if(length(env$eval_list) <= 0) return()
+    is_eval_err <- FALSE
+    if(length(env$eval_err_list) > 0){
+      is_eval_err <- TRUE
+    }
+    if(length(env$model$record_evals) == 0){
+      for(j in 1:length(env$eval_list)) {
+        data_name <- env$eval_list[[j]]$data_name
+        name <- env$eval_list[[j]]$name
+        env$model$record_evals$start_iter <- env$begin_iteration
+        if(is.null(env$model$record_evals[[data_name]])){
+          env$model$record_evals[[data_name]] <- list()
+        }
+        env$model$record_evals[[data_name]][[name]] <- list()
+        env$model$record_evals[[data_name]][[name]]$eval <- list()
+        env$model$record_evals[[data_name]][[name]]$eval_err <- list()
+      }
+    }
+    for(j in 1:length(env$eval_list)) {
+      eval_res <- env$eval_list[[j]]
+      eval_err <- NULL
+      if(is_eval_err){
+        eval_err <- env$eval_err_list[[j]]
+      }
+      data_name <- eval_res$data_name
+      name <- eval_res$name
+      env$model$record_evals[[data_name]][[name]]$eval <- c(env$model$record_evals[[data_name]][[name]]$eval, eval_res$value)
+      env$model$record_evals[[data_name]][[name]]$eval_err <- c(env$model$record_evals[[data_name]][[name]]$eval_err, eval_err)
+    }
+    
+  }
+  attr(callback, 'call') <- match.call()
+  attr(callback, 'name') <- 'cb.record.evaluation'
+  return(callback)
+}
+
+cb.early.stop <- function(stopping_rounds, verbose=TRUE) {
+  # state variables
+  factor_to_bigger_better <- NULL
+  best_iter <- NULL
+  best_score <- NULL
+  best_msg <- NULL
+  eval_len <- NULL
+  init <- function(env) {
+    eval_len <<-  length(env$eval_list)
+    if (eval_len == 0)
+      stop("For early stopping, valids must have at least one element")
+    
+    if (verbose)
+      cat("Will train until hasn't improved in ", 
+          stopping_rounds, " rounds.\n\n", sep = '')
+
+    factor_to_bigger_better <<- rep(1.0, eval_len)
+    best_iter <<- rep(-1, eval_len)
+    best_score <<- rep(-Inf, eval_len)
+    best_msg <<- list()
+    for(i in 1:eval_len){
+      best_msg <<- c(best_msg, "")
+      if(!env$eval_list[[i]]$higher_better){
+        factor_to_bigger_better[i] <<- -1.0
+      }
+    }
+  }
+  
+  callback <- function(env, finalize = FALSE) {
+    if (is.null(eval_len))
+      init(env)
+    cur_iter <- env$iteration
+    for(i in 1:eval_len){
+      score <- env$eval_list[[i]]$value * factor_to_bigger_better[i]
+      if(score > best_score[i]){
+        best_score[i] <<- score
+        best_iter[i] <<- cur_iter
+        if(verbose){
+          best_msg[[i]] <<- as.character(merge.eval.string(env))
+        }
+      } else {
+        if(cur_iter - best_iter[i] >= stopping_rounds){
+          if(!is.null(env$model)){
+            env$model$best_iter <- best_iter[i]
+          }
+          if(verbose){
+            cat('Early stopping, best iteration is:',"\n")
+            cat(best_msg[[i]],"\n")
+          }
+          env$best_iter <- best_iter[i]
+          env$met_early_stop <- TRUE
+        }
+      }
+    }
+  }
+  attr(callback, 'call') <- match.call()
+  attr(callback, 'name') <- 'cb.early.stop'
+  return(callback)
+}
+
+# Extract callback names from the list of callbacks
+callback.names <- function(cb_list) {
+  unlist(lapply(cb_list, function(x) attr(x, 'name')))
+}
+
+add.cb <- function(cb_list, cb) {
+  cb_list <- c(cb_list, cb)
+  names(cb_list) <- callback.names(cb_list)
+  if ('cb.early.stop' %in% names(cb_list)) {
+    cb_list <- c(cb_list, cb_list['cb.early.stop'])
+    # this removes only the first one
+    cb_list['cb.early.stop'] <- NULL 
+  }
+  if ('cb.cv.predict' %in% names(cb_list)) {
+    cb_list <- c(cb_list, cb_list['cb.cv.predict'])
+    cb_list['cb.cv.predict'] <- NULL 
+  }
+  cb_list
+}
+
+categorize.callbacks <- function(cb_list) {
+  list(
+    pre_iter = Filter(function(x) {
+        pre <- attr(x, 'is_pre_iteration')
+        !is.null(pre) && pre 
+      }, cb_list),
+    post_iter = Filter(function(x) {
+        pre <- attr(x, 'is_pre_iteration')
+        is.null(pre) || !pre
+      }, cb_list)
+  )
+}
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
+Booster <- R6Class(
+  "lgb.Booster",
+  cloneable=FALSE,
+  public = list(
+    best_iter = -1,
+    record_evals = list(),
+    finalize = function() {
+      if(!lgb.is.null.handle(private$handle)){
+        print("free booster handle")
+        lgb.call("LGBM_BoosterFree_R", ret=NULL, private$handle)
+        private$handle <- NULL
+      }
+    }, 
+    initialize = function(params = list(),
+                          train_set = NULL,
+                          modelfile = NULL,
+                          ...) {
+      params <- append(params, list(...))
+      params_str <- lgb.params2str(params)
+      handle <- lgb.new.handle()
+      if (!is.null(train_set)) {
+        if (!lgb.check.r6.class(train_set, "lgb.Dataset")) {
+          stop("lgb.Booster: Only can use lgb.Dataset as training data")
+        }
+        handle <-
+          lgb.call("LGBM_BoosterCreate_R", ret=handle, train_set$.__enclos_env__$private$get_handle(), params_str)
+        private$train_set <- train_set
+        private$num_dataset <- 1
+        private$init_predictor <- train_set$.__enclos_env__$private$predictor
+        if (!is.null(private$init_predictor)) {
+          lgb.call("LGBM_BoosterMerge_R", ret=NULL,
+                handle,
+                private$init_predictor$.__enclos_env__$private$handle)
+        }
+        private$is_predicted_cur_iter <-
+          c(private$is_predicted_cur_iter, FALSE)
+      } else if (!is.null(modelfile)) {
+        if (!is.character(modelfile)) {
+          stop("lgb.Booster: Only can use string as model file path")
+        }
+        handle <-
+          lgb.call("LGBM_BoosterCreateFromModelfile_R",
+            ret=handle,
+            lgb.c_str(modelfile))
+      } else {
+        stop(
+          "lgb.Booster: Need at least one training dataset or model file to create booster instance"
+        )
+      }
+      class(handle) <- "lgb.Booster.handle"
+      private$handle <- handle
+      private$num_class <- as.integer(1)
+      private$num_class <-
+        lgb.call("LGBM_BoosterGetNumClasses_R", ret=private$num_class, private$handle)
+    },
+    set_train_data_name = function(name) {
+      private$name_train_set <- name
+      return(self)
+    },
+    add_valid = function(data, name) {
+      if (!lgb.check.r6.class(data, "lgb.Dataset")) {
+        stop("lgb.Booster.add_valid: Only can use lgb.Dataset as validation data")
+      }
+      if (!identical(data$.__enclos_env__$private$predictor, private$init_predictor)) {
+        stop(
+          "lgb.Booster.add_valid: Add validation data failed, you should use same predictor for these data"
+        )
+      }
+      if(!is.character(name)){
+        stop("only can use character as data name")
+      }
+      lgb.call("LGBM_BoosterAddValidData_R", ret=NULL, private$handle, data$.__enclos_env__$private$get_handle())
+      private$valid_sets <- c(private$valid_sets, data)
+      private$name_valid_sets <- c(private$name_valid_sets, name)
+      private$num_dataset <- private$num_dataset + 1
+      private$is_predicted_cur_iter <-
+        c(private$is_predicted_cur_iter, FALSE)
+      return(self)
+    },
+    reset_parameter = function(params, ...) {
+      params <- append(params, list(...))
+      params_str <- algb.params2str(params)
+      lgb.call("LGBM_BoosterResetParameter_R", ret=NULL,
+            private$handle,
+            params_str)
+      return(self)
+    },
+    update = function(train_set = NULL, fobj = NULL) {
+      if (!is.null(train_set)) {
+        if (!lgb.check.r6.class(train_set, "lgb.Dataset")) {
+          stop("lgb.Booster.update: Only can use lgb.Dataset as training data")
+        }
+        if (!identical(train_set$predictor, private$init_predictor)) {
+          stop(
+            "lgb.Booster.update: Change train_set failed, you should use same predictor for these data"
+          )
+        }
+        lgb.call("LGBM_BoosterResetTrainingData_R", ret=NULL,
+              private$handle,
+              train_set$.__enclos_env__$private$get_handle())
+        private$train_set = train_set
+      }
+      if (is.null(fobj)) {
+        ret <-
+          lgb.call("LGBM_BoosterUpdateOneIter_R", ret=NULL, private$handle)
+      } else {
+        if (typeof(fobj) != 'closure') {
+          stop("lgb.Booster.update: fobj should be a function")
+        }
+        gpair <- fobj(private$inner_predict(1), private$train_set)
+        ret <-
+          lgb.call(
+            "LGBM_BoosterUpdateOneIterCustom_R", ret=NULL,
+            private$handle,
+            gpair$grad,
+            gpair$hess,
+            length(gpair$grad)
+          )
+      }
+      for (i in 1:length(private$is_predicted_cur_iter)) {
+        private$is_predicted_cur_iter[[i]] <- FALSE
+      }
+      return(ret)
+    },
+    rollback_one_iter = function() {
+      lgb.call("LGBM_BoosterRollbackOneIter_R", ret=NULL, private$handle)
+      for (i in 1:length(private$is_predicted_cur_iter)) {
+        private$is_predicted_cur_iter[[i]] <- FALSE
+      }
+      return(self)
+    },
+    current_iter = function() {
+      cur_iter <- as.integer(0)
+      return(lgb.call("LGBM_BoosterGetCurrentIteration_R",  ret=cur_iter, private$handle))
+    },
+    eval = function(data, name, feval = NULL) {
+      if (!lgb.check.r6.class(data, "lgb.Dataset")) {
+        stop("lgb.Booster.eval: only can use lgb.Dataset to eval")
+      }
+      data_idx <- 0
+      if (identical(data, private$train_set)) {
+        data_idx <- 1
+      } else {
+        if(length(private$valid_sets) > 0){
+          for (i in 1:length(private$valid_sets)) {
+            if (identical(data, private$valid_sets[[i]])) {
+              data_idx <- i + 1
+              break
+            }
+          }
+        }
+      }
+      if (data_idx == 0) {
+        self$add_valid(data, name)
+        data_idx <- private$num_dataset
+      }
+      return(private$inner_eval(name, data_idx, feval))
+    },
+    eval_train = function(feval = NULL) {
+      return(private$inner_eval(private$name_train_set, 1, feval))
+    },
+    eval_valid = function(feval = NULL) {
+      ret = list()
+      if(length(private$valid_sets) <= 0) return(ret)
+      for (i in 1:length(private$valid_sets)) {
+        ret <-
+          append(ret, private$inner_eval(private$name_valid_sets[[i]], i + 1, feval))
+      }
+      return(ret)
+    },
+    save_model = function(filename, num_iteration = NULL) {
+      if (is.null(num_iteration)) {
+        num_iteration <- self$best_iter
+      }
+      lgb.call(
+        "LGBM_BoosterSaveModel_R",
+        ret = NULL,
+        private$handle,
+        as.integer(num_iteration),
+        lgb.c_str(filename)
+      )
+      return(self)
+    },
+    dump_model = function(num_iteration = NULL) {
+      if (is.null(num_iteration)) {
+        num_iteration <- self$best_iter
+      }
+      return(
+        lgb.call.return.str(
+          "LGBM_BoosterDumpModel_R",
+          private$handle,
+          as.integer(num_iteration)
+        )
+      )
+    },
+    predict = function(data,
+                        num_iteration = NULL,
+                        rawscore = FALSE,
+                        predleaf = FALSE,
+                        header = FALSE,
+                        reshape = FALSE) {
+      if (is.null(num_iteration)) {
+        num_iteration <- self$best_iter
+      }
+      predictor <- Predictor$new(private$handle)
+      return(predictor$predict(data, num_iteration, rawscore, predleaf, header, reshape))
+    },
+    to_predictor = function() {
+      Predictor$new(private$handle)
+    }
+  ),
+  private = list(
+    handle = NULL,
+    train_set = NULL,
+    name_train_set = "training",
+    valid_sets = list(),
+    name_valid_sets = list(),
+    predict_buffer = list(),
+    is_predicted_cur_iter = list(),
+    num_class = 1,
+    num_dataset = 0,
+    init_predictor = NULL,
+    eval_names = NULL,
+    higher_better_inner_eval = NULL,
+    inner_predict = function(idx) {
+      data_name <- private$name_train_set
+      if(idx > 1){
+        data_name <- private$name_valid_sets[[idx - 1]]
+      }
+      if (idx > private$num_dataset) {
+        stop("data_idx should not be greater than num_dataset")
+      }
+      if (is.null(private$predict_buffer[[data_name]])) {
+        npred <- as.integer(0)
+        npred <-
+          lgb.call("LGBM_BoosterGetNumPredict_R",
+                ret = npred,
+                private$handle,
+                as.integer(idx - 1))
+        private$predict_buffer[[data_name]] <- rep(0.0, npred)
+      }
+      if (!private$is_predicted_cur_iter[[idx]]) {
+        private$predict_buffer[[data_name]] <- 
+          lgb.call(
+            "LGBM_BoosterGetPredict_R",
+            ret=private$predict_buffer[[data_name]],
+            private$handle,
+            as.integer(idx - 1)
+          )
+        private$is_predicted_cur_iter[[idx]] <- TRUE
+      }
+      return(private$predict_buffer[[data_name]])
+    },
+    get_eval_info = function() {
+      if (is.null(private$eval_names)) {
+        names <-
+          lgb.call.return.str("LGBM_BoosterGetEvalNames_R", private$handle)
+        if(nchar(names) > 0){
+          names <- strsplit(names, "\t")[[1]]
+          private$eval_names <- names
+          private$higher_better_inner_eval <-
+            rep(FALSE, length(names))
+          for (i in 1:length(names)) {
+            if (startsWith(names[i], "auc") |
+                startsWith(names[i], "ndcg")) {
+              private$higher_better_inner_eval[i] <- TRUE
+            }
+          }
+          
+        }
+      }
+      return(private$eval_names)
+    },
+    inner_eval = function(data_name, data_idx, feval = NULL) {
+      if (data_idx > private$num_dataset) {
+        stop("data_idx should not be greater than num_dataset")
+      }
+      private$get_eval_info()
+      ret <- list()
+      if (length(private$eval_names) > 0) {
+        tmp_vals <- rep(0.0, length(private$eval_names))
+        tmp_vals <-
+          lgb.call("LGBM_BoosterGetEval_R", ret=tmp_vals,
+                private$handle,
+                as.integer(data_idx - 1))
+        for (i in 1:length(private$eval_names)) {
+          res <- list()
+          res$data_name <- data_name
+          res$name <- private$eval_names[i]
+          res$value <- tmp_vals[i]
+          res$higher_better <- private$higher_better_inner_eval[i]
+          ret <- append(ret, list(res))
+        }
+      }
+      if (!is.null(feval)) {
+        if (typeof(feval) != 'closure') {
+          stop("lgb.Booster.eval: feval should be a function")
+        }
+        data <- private$train_set
+        if (data_idx > 1) {
+          data <- private$valid_sets[[data_idx - 1]]
+        }
+        res <- feval(private$inner_predict(data_idx), data)
+        res$data_name <- data_name
+        ret <- append(ret, list(res))
+      }
+      return(ret)
+    }
+  )
+)
+
+# internal helper method
+lgb.is.Booster <- function(x){
+  if(lgb.check.r6.class(x, "lgb.Booster")){
+    return(TRUE)
+  } else{
+    return(FALSE)
+  }
+}
+
+#' Predict method for LightGBM model
+#' 
+#' Predicted values based on class \code{lgb.Booster}
+#' 
+#' @param object Object of class \code{lgb.Booster}
+#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
+#' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
+#' @param rawscore whether the prediction should be returned in the for of original untransformed 
+#'        sum of predictions from boosting iterations' results. E.g., setting \code{rawscore=TRUE} for 
+#'        logistic regression would result in predictions for log-odds instead of probabilities.
+#' @param predleaf whether predict leaf index instead. 
+#' @param header only used for prediction for text file. True if text file has header
+#' @param reshape whether to reshape the vector of predictions to a matrix form when there are several 
+#'        prediction outputs per case. 
+
+#' @return 
+#' For regression or binary classification, it returns a vector of length \code{nrows(data)}.
+#' For multiclass classification, either a \code{num_class * nrows(data)} vector or 
+#' a \code{(nrows(data), num_class)} dimension matrix is returned, depending on 
+#' the \code{reshape} value.
+#' 
+#' When \code{predleaf = TRUE}, the output is a matrix object with the 
+#' number of columns corresponding to the number of trees.
+#' @examples
+#' library(lightgbm)
+#' data(agaricus.train, package='lightgbm')
+#' train <- agaricus.train
+#' dtrain <- lgb.Dataset(train$data, label=train$label)
+#' data(agaricus.test, package='lightgbm')
+#' test <- agaricus.test
+#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label)
+#' params <- list(objective="regression", metric="l2")
+#' valids <- list(test=dtest)
+#' model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10)
+#' preds <- predict(model, test$data)
+#' 
+#' @rdname predict.lgb.Booster
+#' @export
+predict.lgb.Booster <- function(object, 
+                        data,
+                        num_iteration = NULL,
+                        rawscore = FALSE,
+                        predleaf = FALSE,
+                        header = FALSE,
+                        reshape = FALSE) {
+  if(!lgb.is.Booster(object)){
+    stop("predict.lgb.Booster: should input lgb.Booster object")
+  }
+  object$predict(data, num_iteration, rawscore, predleaf, header, reshape)
+}
+
+#' Load LightGBM model
+#' 
+#' Load LightGBM model from saved model file
+#' 
+#' @param filename path of model file
+#' 
+#' @return booster
+#' @examples
+#' library(lightgbm)
+#' data(agaricus.train, package='lightgbm')
+#' train <- agaricus.train
+#' dtrain <- lgb.Dataset(train$data, label=train$label)
+#' data(agaricus.test, package='lightgbm')
+#' test <- agaricus.test
+#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label)
+#' params <- list(objective="regression", metric="l2")
+#' valids <- list(test=dtest)
+#' model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10)
+#' lgb.save(model, "model.txt")
+#' load_booster <- lgb.load("model.txt")
+#' @rdname lgb.load 
+#' @export
+lgb.load <- function(filename){
+  if(!is.character(filename)){
+    stop("lgb.load: filename should be character")
+  }
+  Booster$new(modelfile=filename)
+}
+
+#' Save LightGBM model
+#' 
+#' Save LightGBM model
+#' 
+#' @param booster Object of class \code{lgb.Booster}
+#' @param filename saved filename
+#' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
+#' 
+#' @return booster
+#' @examples
+#' library(lightgbm)
+#' data(agaricus.train, package='lightgbm')
+#' train <- agaricus.train
+#' dtrain <- lgb.Dataset(train$data, label=train$label)
+#' data(agaricus.test, package='lightgbm')
+#' test <- agaricus.test
+#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label)
+#' params <- list(objective="regression", metric="l2")
+#' valids <- list(test=dtest)
+#' model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10)
+#' lgb.save(model, "model.txt")
+#' @rdname lgb.save 
+#' @export
+lgb.save <- function(booster, filename, num_iteration=NULL){
+  if(!lgb.is.Booster(booster)){
+    stop("lgb.save: should input lgb.Booster object")
+  }
+  if(!is.character(filename)){
+    stop("lgb.save: filename should be character")
+  }
+  booster$save_model(filename, num_iteration)
+}
+
+#' Dump LightGBM model to json
+#' 
+#' Dump LightGBM model to json
+#' 
+#' @param booster Object of class \code{lgb.Booster}
+#' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
+#' 
+#' @return json format of model
+#' @examples
+#' library(lightgbm)
+#' data(agaricus.train, package='lightgbm')
+#' train <- agaricus.train
+#' dtrain <- lgb.Dataset(train$data, label=train$label)
+#' data(agaricus.test, package='lightgbm')
+#' test <- agaricus.test
+#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label)
+#' params <- list(objective="regression", metric="l2")
+#' valids <- list(test=dtest)
+#' model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10)
+#' json_model <- lgb.dump(model)
+#' @rdname lgb.dump 
+#' @export
+lgb.dump <- function(booster, num_iteration=NULL){
+  if(!lgb.is.Booster(booster)){
+    stop("lgb.dump: should input lgb.Booster object")
+  }
+  booster$dump_model(num_iteration)
+}
+
+#' Get record evaluation result from booster
+#' 
+#' Get record evaluation result from booster
+#' @param booster Object of class \code{lgb.Booster}
+#' @param data_name name of dataset
+#' @param eval_name name of evaluation
+#' @param iters iterations, NULL will return all
+#' @param is_err TRUE will return evaluation error instead
+#' @return vector of evaluation result
+#' 
+#' @rdname lgb.get.eval.result
+#' @export
+lgb.get.eval.result <- function(booster, data_name, eval_name, iters=NULL, is_err=FALSE){
+  if(!lgb.is.Booster(booster)){
+    stop("lgb.get.eval.result: only can use booster to get eval result")
+  }
+  if(!is.character(data_name) | !is.character(eval_name)){
+    stop("lgb.get.eval.result: data_name and eval_name should be character")
+  }
+  if(is.null(booster$record_evals[[data_name]])){
+    stop("lgb.get.eval.result: wrong data name")
+  }
+  if(is.null(booster$record_evals[[data_name]][[eval_name]])){
+    stop("lgb.get.eval.result: wrong eval name")
+  }
+  result <- booster$record_evals[[data_name]][[eval_name]]$eval
+  if(is_err){
+    result <- booster$record_evals[[data_name]][[eval_name]]$eval_err
+  }
+  if(is.null(iters)){
+    return(as.numeric(result))
+  }
+  iters <- as.integer(iters)
+  delta <- booster$record_evals$start_iter - 1
+  iters <- iters - delta
+  return(as.numeric(result[iters]))
+}
+
--- a/R-package/R/lgb.Dataset.R
+++ b/R-package/R/lgb.Dataset.R
--- a/R-package/R/lgb.Predictor.R
+++ b/R-package/R/lgb.Predictor.R
+Predictor <- R6Class(
+  "lgb.Predictor",
+  cloneable=FALSE,
+  public = list(
+    finalize = function() {
+      if(private$need_free_handle & !lgb.is.null.handle(private$handle)){
+        print("free booster handle")
+        lgb.call("LGBM_BoosterFree_R", ret=NULL, private$handle)
+        private$handle <- NULL
+      }
+    }, 
+    initialize = function(modelfile) {
+      handle <- lgb.new.handle()
+      if(typeof(modelfile) == "character") {
+        handle <- lgb.call("LGBM_BoosterCreateFromModelfile_R", ret=handle, lgb.c_str(modelfile))
+        private$need_free_handle = TRUE
+      } else if (class(modelfile) == "lgb.Booster.handle") {
+        handle <- modelfile
+        private$need_free_handle = FALSE
+      } else {
+        stop("lgb.Predictor: modelfile must be either character filename, or lgb.Booster.handle")
+      }
+      class(handle) <- "lgb.Booster.handle"
+      private$handle <- handle
+    },
+    current_iter = function() {
+      cur_iter <- as.integer(0)
+      return(lgb.call("LGBM_BoosterGetCurrentIteration_R",  ret=cur_iter, private$handle))
+    },
+    predict = function(data, 
+      num_iteration = NULL, rawscore = FALSE, predleaf = FALSE, header = FALSE, 
+      reshape = FALSE) {
+
+      if (is.null(num_iteration)) {
+        num_iteration <- -1
+        
+      }
+
+      num_row <- 0
+      if (typeof(data) == "character") {
+        tmp_filename <- tempfile(pattern = "lightgbm_")
+        lgb.call("LGBM_BoosterPredictForFile_R", ret=NULL, private$handle, data, as.integer(header),
+          as.integer(rawscore),
+          as.integer(predleaf),
+          as.integer(num_iteration),
+          lgb.c_str(tmp_filename))
+        preds <- read.delim(tmp_filename, header=FALSE, seq="\t")
+        num_row <- nrow(preds)
+        preds <- as.vector(t(preds))
+        # delete temp file
+        if(file.exists(tmp_filename)) { file.remove(tmp_filename) }
+      } else {
+        num_row <- nrow(data)
+        npred <- as.integer(0)
+        npred <- lgb.call("LGBM_BoosterCalcNumPredict_R", ret=npred,
+          private$handle,
+          as.integer(num_row),
+          as.integer(rawscore),
+          as.integer(predleaf),
+          as.integer(num_iteration))
+        # allocte space for prediction
+        preds <- rep(0.0, npred)
+        if (is.matrix(data)) {
+          preds <- lgb.call("LGBM_BoosterPredictForMat_R", ret=preds, 
+            private$handle, 
+            data,
+            as.integer(nrow(data)),
+            as.integer(ncol(data)),
+            as.integer(rawscore),
+            as.integer(predleaf),
+            as.integer(num_iteration))
+        } else if (class(data) == "dgCMatrix") {
+          preds <- lgb.call("LGBM_BoosterPredictForCSC_R", ret=preds,
+            private$handle, 
+            data@p, 
+            data@i, 
+            data@x,
+            length(data@p),
+            length(data@x),
+            nrow(data),
+            as.integer(rawscore),
+            as.integer(predleaf),
+            as.integer(num_iteration))
+        } else {
+          stop(paste("predict: does not support to predict from ",
+                   typeof(data)))
+          }
+      }
+
+      if (length(preds) %% num_row != 0) {
+        stop("predict: prediction length ", length(preds)," is not multiple of nrows(data) ", num_row)
+      }
+      npred_per_case <- length(preds) / num_row
+      if (reshape && npred_per_case > 1) {
+        preds <- matrix(preds, ncol = npred_per_case)
+      }
+      return(preds)
+    }
+  ), 
+  private = list(
+    handle = NULL,
+    need_free_handle = FALSE
+  )
+)
--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
+#' Main training logic for LightGBM
+#' 
+#' Main training logic for LightGBM
+#' 
+#' @param params List of parameters
+#' @param data a \code{lgb.Dataset} object, used for training
+#' @param nrounds number of training rounds
+#' @param valids a list of \code{lgb.Dataset} object, used for validation
+#' @param obj objective function, can be character or custom objective function
+#' @param eval evaluation function, can be (list of) character or custom eval function
+#' @param verbose verbosity for output
+#'        if verbose > 0 , also will record iteration message to booster$record_evals
+#' @param eval_freq evalutaion output frequence
+#' @param init_model path of model file of \code{lgb.Booster} object, will continue train from this model
+#' @param colnames feature names, if not null, will use this to overwrite the names in dataset
+#' @param categorical_feature list of str or int
+#'        type int represents index,
+#'        type str represents feature names
+#' @param early_stopping_rounds int
+#'        Activates early stopping.
+#'        Requires at least one validation data and one metric
+#'        If there's more than one, will check all of them
+#'        Returns the model with (best_iter + early_stopping_rounds)
+#'        If early stopping occurs, the model will have 'best_iter' field
+#' @param callbacks list of callback functions
+#'        List of callback functions that are applied at each iteration.
+#' @param ... other parameters, see parameters.md for more informations
+#' @return a trained booster model \code{lgb.Booster}. 
+#' @examples
+#' library(lightgbm)
+#' data(agaricus.train, package='lightgbm')
+#' train <- agaricus.train
+#' dtrain <- lgb.Dataset(train$data, label=train$label)
+#' data(agaricus.test, package='lightgbm')
+#' test <- agaricus.test
+#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label)
+#' params <- list(objective="regression", metric="l2")
+#' valids <- list(test=dtest)
+#' model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10)
+#'
+#' @rdname lgb.train
+#' @export
+lgb.train <- function(params=list(), data, nrounds=10, 
+                      valids=list(), 
+                      obj=NULL, eval=NULL,
+                      verbose=1, eval_freq=1L,
+                      init_model=NULL, 
+                      colnames=NULL,
+                      categorical_feature=NULL,
+                      early_stopping_rounds=NULL,
+                      callbacks=list(), ...) {
+  addiction_params <- list(...)
+  params <- append(params, addiction_params)
+  params$verbose <- verbose
+  params <- lgb.check.obj(params, obj)
+  params <- lgb.check.eval(params, eval)
+  fobj <- NULL
+  feval <- NULL
+  if(typeof(params$objective) == "closure"){
+    fobj <- params$objective
+    params$objective <- "NONE"
+  } 
+  if (typeof(eval) == "closure"){
+    feval <- eval
+  }
+  lgb.check.params(params)
+  predictor <- NULL
+  if(is.character(init_model)){
+    predictor <- Predictor$new(init_model)
+  } else if(lgb.is.Booster(init_model)) {
+    predictor <- init_model$to_predictor()
+  }
+  begin_iteration <- 1
+  if(!is.null(predictor)){
+    begin_iteration <- predictor$current_iter() + 1
+  }
+  end_iteration <- begin_iteration + nrounds - 1
+
+  # check dataset
+  if(!lgb.is.Dataset(data)){
+    stop("lgb.train: data only accepts lgb.Dataset object")
+  }
+  if (length(valids) > 0) {
+    if (typeof(valids) != "list" ||
+        !all(sapply(valids, lgb.is.Dataset)))
+      stop("valids must be a list of lgb.Dataset elements")
+    evnames <- names(valids)
+    if (is.null(evnames) || any(evnames == ""))
+      stop("each element of the valids must have a name tag")
+  }
+
+  data$update_params(params)
+  data$.__enclos_env__$private$set_predictor(predictor)
+  if(!is.null(colnames)){
+    data$set_colnames(colnames)
+  }
+  data$set_categorical_feature(categorical_feature)
+
+  vaild_contain_train <- FALSE
+  train_data_name <- "train"
+  reduced_valid_sets <- list()
+  if(length(valids) > 0){
+    for (key in names(valids)) {
+      valid_data <- valids[[key]]
+      if(identical(data, valid_data)){
+        vaild_contain_train <- TRUE
+        train_data_name <- key
+        next
+      }
+      valid_data$update_params(params)
+      valid_data$set_reference(data)
+      reduced_valid_sets[[key]] <- valid_data
+    }
+  }
+  # process callbacks
+  if(eval_freq > 0){
+    callbacks <- add.cb(callbacks, cb.print.evaluation(eval_freq))
+  }
+
+  if (verbose > 0 && length(valids) > 0) {
+    callbacks <- add.cb(callbacks, cb.record.evaluation())
+  }
+
+  # Early stopping callback
+  if (!is.null(early_stopping_rounds)) {
+    if(early_stopping_rounds > 0){
+      callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds, verbose=verbose))
+    }
+  }
+
+  cb <- categorize.callbacks(callbacks)
+
+  # construct booster
+  booster <- Booster$new(params=params, train_set=data)
+  if(vaild_contain_train){
+    booster$set_train_data_name(train_data_name)
+  }
+  for (key in names(reduced_valid_sets)) {
+    booster$add_valid(reduced_valid_sets[[key]], key)
+  }
+
+  # callback env
+
+  env <- CB_ENV$new()
+  env$model <- booster
+  env$begin_iteration <- begin_iteration
+  env$end_iteration <- end_iteration
+
+  #start training
+  for(i in begin_iteration:end_iteration){
+    env$iteration <- i
+    env$eval_list <- list()
+    for (f in cb$pre_iter) f(env)
+    # update one iter
+    booster$update(fobj=fobj)
+
+    # collect eval result
+    eval_list <- list()
+    if(length(valids) > 0){
+      if(vaild_contain_train){
+        eval_list <- append(eval_list, booster$eval_train(feval=feval))
+      }
+      eval_list <- append(eval_list, booster$eval_valid(feval=feval))
+    }
+    env$eval_list <- eval_list
+    
+    for (f in cb$post_iter) f(env)
+
+    # met early stopping
+    if(env$met_early_stop) break
+  }
+
+  return(booster)
+}
+
+
+
--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
+# Simple interface for training an lightgbm model.
+# Its documentation is combined with lgb.train.
+#
+#' @rdname lgb.train
+#' @export
+lightgbm <- function(data, label = NULL, weight = NULL,
+                    params = list(), nrounds=10,
+                    verbose = 1, eval_freq = 1L, 
+                    early_stopping_rounds = NULL,
+                    save_name = "lightgbm.model",
+                    init_model = NULL, callbacks = list(), ...) {
+
+  dtrain <- lgb.Dataset(data, label=label, weight=weight)
+
+  valids <- list()
+  if (verbose > 0)
+    valids$train = dtrain
+
+  bst <- lgb.train(params, dtrain, nrounds, valids, verbose = verbose, eval_freq=eval_freq,
+                   early_stopping_rounds = early_stopping_rounds,
+                   init_model = init_model, callbacks = callbacks, ...)
+  bst$save_model(save_name)
+  return(bst)
+}
+
+#' Training part from Mushroom Data Set
+#' 
+#' This data set is originally from the Mushroom data set,
+#' UCI Machine Learning Repository.
+#' 
+#' This data set includes the following fields:
+#' 
+#' \itemize{
+#'  \item \code{label} the label for each record
+#'  \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
+#' }
+#'
+#' @references
+#' https://archive.ics.uci.edu/ml/datasets/Mushroom
+#' 
+#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository 
+#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, 
+#' School of Information and Computer Science.
+#' 
+#' @docType data
+#' @keywords datasets
+#' @name agaricus.train
+#' @usage data(agaricus.train)
+#' @format A list containing a label vector, and a dgCMatrix object with 6513 
+#' rows and 127 variables
+NULL
+
+#' Test part from Mushroom Data Set
+#'
+#' This data set is originally from the Mushroom data set,
+#' UCI Machine Learning Repository.
+#' 
+#' This data set includes the following fields:
+#' 
+#' \itemize{
+#'  \item \code{label} the label for each record
+#'  \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
+#' }
+#'
+#' @references
+#' https://archive.ics.uci.edu/ml/datasets/Mushroom
+#' 
+#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository 
+#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, 
+#' School of Information and Computer Science.
+#' 
+#' @docType data
+#' @keywords datasets
+#' @name agaricus.test
+#' @usage data(agaricus.test)
+#' @format A list containing a label vector, and a dgCMatrix object with 1611 
+#' rows and 126 variables
+NULL
+
+# Various imports
+#' @importFrom R6 R6Class
+#' @useDynLib lightgbm
+NULL
\ No newline at end of file
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
+lgb.new.handle <- function() {
+  # use 64bit data to store address
+  return(0.0)
+}
+lgb.is.null.handle <- function(x) {
+  if (is.null(x)) {
+    return(TRUE)
+  }
+  if (x == 0) {
+    return(TRUE)
+  }
+  return(FALSE)
+}
+
+lgb.encode.char <- function(arr, len) {
+  if (typeof(arr) != "raw") {
+    stop("lgb.encode.char: only can encode from raw type")
+  }
+  return(rawToChar(arr[1:len]))
+}
+
+lgb.call <- function(fun_name, ret, ...) {
+  call_state <- as.integer(0)
+  if (!is.null(ret)) {
+    call_state <-
+      .Call(fun_name, ..., ret, call_state , PACKAGE = "lightgbm")
+  } else {
+    call_state <- .Call(fun_name, ..., call_state , PACKAGE = "lightgbm")
+  }
+  if (call_state != as.integer(0)) {
+    buf_len <- as.integer(200)
+    act_len <- as.integer(0)
+    err_msg <- raw(buf_len)
+    err_msg <-
+      .Call("LGBM_GetLastError_R", buf_len, act_len, err_msg, PACKAGE = "lightgbm")
+    if (act_len > buf_len) {
+      buf_len <- act_len
+      err_msg <- raw(buf_len)
+      err_msg <-
+        .Call("LGBM_GetLastError_R",
+              buf_len,
+              act_len,
+              err_msg,
+              PACKAGE = "lightgbm")
+    }
+    stop(paste0("api error: ", lgb.encode.char(err_msg, act_len)))
+  }
+  return(ret)
+}
+
+
+lgb.call.return.str <- function(fun_name, ...) {
+  buf_len <- as.integer(1024 * 1024)
+  act_len <- as.integer(0)
+  buf <- raw(buf_len)
+  buf <- lgb.call(fun_name, ret = buf, ..., buf_len, act_len)
+  if (act_len > buf_len) {
+    buf_len <- act_len
+    buf <- raw(buf_len)
+    buf <- lgb.call(fun_name, ret = buf, ..., buf_len, act_len)
+  }
+  return(lgb.encode.char(buf, act_len))
+}
+
+lgb.params2str <- function(params, ...) {
+  if (typeof(params) != "list")
+    stop("params must be a list")
+  names(params) <- gsub("\\.", "_", names(params))
+  # merge parameters from the params and the dots-expansion
+  dot_params <- list(...)
+  names(dot_params) <- gsub("\\.", "_", names(dot_params))
+  if (length(intersect(names(params),
+                       names(dot_params))) > 0)
+    stop(
+      "Same parameters in 'params' and in the call are not allowed. Please check your 'params' list."
+    )
+  params <- c(params, dot_params)
+  ret <- list()
+  for (key in names(params)) {
+    # join multi value first
+    val <- paste0(params[[key]], collapse = ",")
+    if(nchar(val) <= 0) next
+    # join key value
+    pair <- paste0(c(key, val), collapse = "=")
+    ret <- c(ret, pair)
+  }
+  if (length(ret) == 0) {
+    return(lgb.c_str(""))
+  } else{
+    return(lgb.c_str(paste0(ret, collapse = " ")))
+  }
+}
+
+lgb.c_str <- function(x) {
+  ret <- charToRaw(as.character(x))
+  ret <- c(ret, as.raw(0))
+  return(ret)
+}
+
+lgb.check.r6.class <- function(object, name) {
+  if (!("R6" %in% class(object))) {
+    return(FALSE)
+  }
+  if (!(name %in% class(object))) {
+    return(FALSE)
+  }
+  return(TRUE)
+}
+
+lgb.check.params <- function(params){
+  # To-do
+  return(params)
+}
+
+lgb.check.obj <- function(params, obj) {
+  if(!is.null(obj)){
+    params$objective <- obj
+  }
+  if(is.character(params$objective)){ 
+    if(!(params$objective %in% c("regression", "binary", "multiclass", "lambdarank"))){
+      stop("lgb.check.obj: objective name error should be (regression, binary, multiclass, lambdarank)")
+    }
+  } else if(typeof(params$objective) != "closure"){
+    stop("lgb.check.obj: objective should be character or function")
+  }
+  return(params)
+}
+
+lgb.check.eval <- function(params, eval) {
+  if(is.null(params$metric)){
+    params$metric <- list()
+  }
+  if(!is.null(eval)){
+    # append metric
+    if(is.character(eval) || is.list(eval)){
+      params$metric <- append(params$metric, eval)
+    }
+  }
+  if (typeof(eval) != "closure"){
+    if(is.null(params$metric) | length(params$metric) == 0) {
+      # add default metric
+      if(is.character(params$objective)){
+        if(params$objective == "regression"){
+          params$metric <- "l2"
+        } else if(params$objective == "binary"){
+          params$metric <- "binary_logloss"
+        } else if(params$objective == "multiclass"){
+          params$metric <- "multi_logloss"
+        } else if(params$objective == "lambdarank"){
+          params$metric <- "ndcg"
+        }
+      }
+    }
+  }
+  return(params)
+}
+
--- a/R-package/README.md
+++ b/R-package/README.md
+LightGBM R Package
+==================
+
+Installation
+------------
+```
+cd R-package
+R CMD INSTALL --build  .
+```
\ No newline at end of file
--- a/R-package/data/agaricus.test.rda
+++ b/R-package/data/agaricus.test.rda
--- a/R-package/data/agaricus.train.rda
+++ b/R-package/data/agaricus.train.rda
--- a/R-package/man/agaricus.test.Rd
+++ b/R-package/man/agaricus.test.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lightgbm.R
+\docType{data}
+\name{agaricus.test}
+\alias{agaricus.test}
+\title{Test part from Mushroom Data Set}
+\format{A list containing a label vector, and a dgCMatrix object with 1611 
+rows and 126 variables}
+\usage{
+data(agaricus.test)
+}
+\description{
+This data set is originally from the Mushroom data set,
+UCI Machine Learning Repository.
+}
+\details{
+This data set includes the following fields:
+
+\itemize{
+ \item \code{label} the label for each record
+ \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
+}
+}
+\references{
+https://archive.ics.uci.edu/ml/datasets/Mushroom
+
+Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository 
+[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, 
+School of Information and Computer Science.
+}
+\keyword{datasets}
+
--- a/R-package/man/agaricus.train.Rd
+++ b/R-package/man/agaricus.train.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lightgbm.R
+\docType{data}
+\name{agaricus.train}
+\alias{agaricus.train}
+\title{Training part from Mushroom Data Set}
+\format{A list containing a label vector, and a dgCMatrix object with 6513 
+rows and 127 variables}
+\usage{
+data(agaricus.train)
+}
+\description{
+This data set is originally from the Mushroom data set,
+UCI Machine Learning Repository.
+}
+\details{
+This data set includes the following fields:
+
+\itemize{
+ \item \code{label} the label for each record
+ \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
+}
+}
+\references{
+https://archive.ics.uci.edu/ml/datasets/Mushroom
+
+Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository 
+[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, 
+School of Information and Computer Science.
+}
+\keyword{datasets}
+
--- a/R-package/man/dim.Rd
+++ b/R-package/man/dim.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lgb.Dataset.R
+\name{dim.lgb.Dataset}
+\alias{dim.lgb.Dataset}
+\title{Dimensions of lgb.Dataset}
+\usage{
+\method{dim}{lgb.Dataset}(x, ...)
+}
+\arguments{
+\item{x}{Object of class \code{lgb.Dataset}}
+
+\item{...}{other parameters}
+}
+\value{
+a vector of numbers of rows and of columns
+}
+\description{
+Dimensions of lgb.Dataset
+}
+\details{
+Returns a vector of numbers of rows and of columns in an \code{lgb.Dataset}.
+
+
+Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also
+be directly used with an \code{lgb.Dataset} object.
+}
+\examples{
+data(agaricus.train, package='lightgbm')
+train <- agaricus.train
+dtrain <- lgb.Dataset(train$data, label=train$label)
+
+stopifnot(nrow(dtrain) == nrow(train$data))
+stopifnot(ncol(dtrain) == ncol(train$data))
+stopifnot(all(dim(dtrain) == dim(train$data)))
+
+}
+
--- a/R-package/man/dimnames.lgb.Dataset.Rd
+++ b/R-package/man/dimnames.lgb.Dataset.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lgb.Dataset.R
+\name{dimnames.lgb.Dataset}
+\alias{dimnames.lgb.Dataset}
+\alias{dimnames<-.lgb.Dataset}
+\title{Handling of column names of \code{lgb.Dataset}}
+\usage{
+\method{dimnames}{lgb.Dataset}(x)
+
+\method{dimnames}{lgb.Dataset}(x) <- value
+}
+\arguments{
+\item{x}{object of class \code{lgb.Dataset}}
+
+\item{value}{a list of two elements: the first one is ignored
+and the second one is column names}
+}
+\description{
+Handling of column names of \code{lgb.Dataset}
+}
+\details{
+Only column names are supported for \code{lgb.Dataset}, thus setting of
+row names would have no effect and returnten row names would be NULL.
+
+
+Generic \code{dimnames} methods are used by \code{colnames}.
+Since row names are irrelevant, it is recommended to use \code{colnames} directly.
+}
+\examples{
+data(agaricus.train, package='lightgbm')
+train <- agaricus.train
+dtrain <- lgb.Dataset(train$data, label=train$label)
+lgb.Dataset.construct(dtrain)
+dimnames(dtrain)
+colnames(dtrain)
+colnames(dtrain) <- make.names(1:ncol(train$data))
+print(dtrain, verbose=TRUE)
+
+}
+
--- a/R-package/man/getinfo.Rd
+++ b/R-package/man/getinfo.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lgb.Dataset.R
+\name{getinfo}
+\alias{getinfo}
+\alias{getinfo.lgb.Dataset}
+\title{Get information of an lgb.Dataset object}
+\usage{
+getinfo(dataset, ...)
+
+\method{getinfo}{lgb.Dataset}(dataset, name, ...)
+}
+\arguments{
+\item{dataset}{Object of class \code{lgb.Dataset}}
+
+\item{...}{other parameters}
+
+\item{name}{the name of the information field to get (see details)}
+}
+\value{
+info data
+}
+\description{
+Get information of an lgb.Dataset object
+}
+\details{
+The \code{name} field can be one of the following:
+
+\itemize{
+    \item \code{label}: label lightgbm learn from ;
+    \item \code{weight}: to do a weight rescale ;
+    \item \code{group}: group size
+    \item \code{init_score}: initial score is the base prediction lightgbm will boost from ;
+}
+}
+\examples{
+data(agaricus.train, package='lightgbm')
+train <- agaricus.train
+dtrain <- lgb.Dataset(train$data, label=train$label)
+lgb.Dataset.construct(dtrain)
+labels <- getinfo(dtrain, 'label')
+setinfo(dtrain, 'label', 1-labels)
+
+labels2 <- getinfo(dtrain, 'label')
+stopifnot(all(labels2 == 1-labels))
+}
+
--- a/R-package/man/lgb.Dataset.Rd
+++ b/R-package/man/lgb.Dataset.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lgb.Dataset.R
+\name{lgb.Dataset}
+\alias{lgb.Dataset}
+\title{Contruct lgb.Dataset object}
+\usage{
+lgb.Dataset(data, params = list(), reference = NULL, colnames = NULL,
+  categorical_feature = NULL, free_raw_data = TRUE, info = list(), ...)
+}
+\arguments{
+\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename}
+
+\item{params}{a list of parameters}
+
+\item{reference}{reference dataset}
+
+\item{colnames}{names of columns}
+
+\item{categorical_feature}{categorical features}
+
+\item{free_raw_data}{TRUE for need to free raw data after construct}
+
+\item{info}{a list of information of the lgb.Dataset object}
+
+\item{...}{other information to pass to \code{info} or parameters pass to \code{params}}
+}
+\value{
+constructed dataset
+}
+\description{
+Contruct lgb.Dataset object
+}
+\details{
+Contruct lgb.Dataset object from dense matrix, sparse matrix
+or local file (that was created previously by saving an \code{lgb.Dataset}).
+}
+\examples{
+data(agaricus.train, package='lightgbm')
+train <- agaricus.train
+dtrain <- lgb.Dataset(train$data, label=train$label)
+lgb.Dataset.save(dtrain, 'lgb.Dataset.data')
+dtrain <- lgb.Dataset('lgb.Dataset.data')
+lgb.Dataset.construct(dtrain)
+}
+
--- a/R-package/man/lgb.Dataset.construct.Rd
+++ b/R-package/man/lgb.Dataset.construct.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lgb.Dataset.R
+\name{lgb.Dataset.construct}
+\alias{lgb.Dataset.construct}
+\title{Construct Dataset explicit}
+\usage{
+lgb.Dataset.construct(dataset)
+}
+\arguments{
+\item{dataset}{Object of class \code{lgb.Dataset}}
+}
+\description{
+Construct Dataset explicit
+}
+\examples{
+data(agaricus.train, package='lightgbm')
+train <- agaricus.train
+dtrain <- lgb.Dataset(train$data, label=train$label)
+lgb.Dataset.construct(dtrain)
+}
+