[R-package] miscellaneous changes to comply with CRAN requirements (#3338)

* [R-package] update DESCRIPTION per CRAN comments * newlines * Apply suggestions from code review Co-authored-by: Nikita Titov <nekit94-08@mail.ru> * more fixes * update Rbuildignore * more changes * more changes per CRAN response * add email * run examples in CI * add newest CRAN response * add Solaris patch * update patch * another attempt at ifaddrs patch * fix unnecessary comment * update configure * comments * bump version * tabs * fix address alignment, required by cran (#3415) * fix dataset binary file alignment * many fixes * fix warnings * fix bug * Update file_io.cpp * Update file_io.cpp * simplify code * Apply suggestions from code review * general * remove unneeded alignment * Update file_io.h * int32 to byte8 alignment * Apply suggestions from code review * Apply suggestions from code review * [R-package] add new copyright holder in DESCRIPTION (#3409) * [R-package] add new copyright holder in DESCRIPTION * fix role * fixing conflicts * [R-package] add new copyright holder in DESCRIPTION (#3409) * [R-package] add new copyright holder in DESCRIPTION * fix role * trying to fix conflicts * more fixes * this will work * update cran-comments * simplify solaris, add more testing docs * stuff * remove rchck docs * Apply suggestions from code review Co-authored-by: Nikita Titov <nekit94-08@mail.ru> * remove extra use of cat() * change solaris check * update docs * remove testing code * fix warning about cleanup not having execute permissions * fix cmake builds * remove blank line Co-authored-by: Nikita Titov <nekit94-08@mail.ru> Co-authored-by: Guolin Ke <guolin.ke@outlook.com>

[R-package] miscellaneous changes to comply with CRAN requirements (#3338)
* [R-package] update DESCRIPTION per CRAN comments * newlines * Apply suggestions from code review Co-authored-by: Nikita Titov <nekit94-08@mail.ru> * more fixes * update Rbuildignore * more changes * more changes per CRAN response * add email * run examples in CI * add newest CRAN response * add Solaris patch * update patch * another attempt at ifaddrs patch * fix unnecessary comment * update configure * comments * bump version * tabs * fix address alignment, required by cran (#3415) * fix dataset binary file alignment * many fixes * fix warnings * fix bug * Update file_io.cpp * Update file_io.cpp * simplify code * Apply suggestions from code review * general * remove unneeded alignment * Update file_io.h * int32 to byte8 alignment * Apply suggestions from code review * Apply suggestions from code review * [R-package] add new copyright holder in DESCRIPTION (#3409) * [R-package] add new copyright holder in DESCRIPTION * fix role * fixing conflicts * [R-package] add new copyright holder in DESCRIPTION (#3409) * [R-package] add new copyright holder in DESCRIPTION * fix role * trying to fix conflicts * more fixes * this will work * update cran-comments * simplify solaris, add more testing docs * stuff * remove rchck docs * Apply suggestions from code review Co-authored-by: Nikita Titov <nekit94-08@mail.ru> * remove extra use of cat() * change solaris check * update docs * remove testing code * fix warning about cleanup not having execute permissions * fix cmake builds * remove blank line Co-authored-by: Nikita Titov <nekit94-08@mail.ru> Co-authored-by: Guolin Ke <guolin.ke@outlook.com>
186711de · James Lamb · GitHub · 7a51ae04 · 186711de · 186711de
Unverified Commit 186711de authored Oct 08, 2020 by James Lamb Committed by GitHub Oct 07, 2020
20 changed files
--- a/.ci/lint_r_code.R
+++ b/.ci/lint_r_code.R
@@ -53,7 +53,8 @@ LINTERS_TO_USE <- list(
    , "true_false"           = lintr::T_and_F_symbol_linter
    , "undesirable_function" = lintr::undesirable_function_linter(
        fun = c(
-            "cbind" = paste0(
+            "cat" = "CRAN forbids the use of cat() in packages except in special cases. Use message() or warning()."
+            , "cbind" = paste0(
                "cbind is an unsafe way to build up a data frame. merge() or direct "
                , "column assignment is preferred."
            )
@@ -85,7 +86,7 @@ LINTERS_TO_USE <- list(
    , "unneeded_concatenation" = lintr::unneeded_concatenation_linter
 )

-cat(sprintf("Found %i R files to lint\n", length(FILES_TO_LINT)))
+print(sprintf("Found %i R files to lint\n", length(FILES_TO_LINT)))

 results <- NULL


--- a/.ci/test_r_package.sh
+++ b/.ci/test_r_package.sh
@@ -165,7 +165,7 @@ check_succeeded="yes"
 (
    R CMD check ${PKG_TARBALL} \
        --as-cran \
-        --run-dontrun \
+        --run-donttest \
    || check_succeeded="no"
 ) &


--- a/.ci/test_r_package_windows.ps1
+++ b/.ci/test_r_package_windows.ps1
@@ -147,9 +147,9 @@ if ($env:COMPILER -ne "MSVC") {
  Write-Output "Running R CMD check"
  if ($env:R_BUILD_TYPE -eq "cran") {
    # CRAN packages must pass without --no-multiarch (build on 64-bit and 32-bit)
-    $check_args = "c('CMD', 'check', '--as-cran', '--run-dontrun', '$PKG_FILE_NAME')"
+    $check_args = "c('CMD', 'check', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')"
  } else {
-    $check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-dontrun', '$PKG_FILE_NAME')"
+    $check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')"
  }
  Run-R-Code-Redirect-Stderr "result <- processx::run(command = 'R.exe', args = $check_args, echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; $check_succeeded = $?


--- a/R-package/.Rbuildignore
+++ b/R-package/.Rbuildignore
@@ -3,7 +3,7 @@ AUTOCONF_UBUNTU_VERSION
 ^.*\.bin
 ^build_r.R$
 ^cran-comments\.md$
-^docs/.*$
+^docs$
 ^.*\.dll
 \.gitkeep$
 ^.*\.history

--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -6,11 +6,25 @@ Date: ~~DATE~~
 Authors@R: c(
    person("Guolin", "Ke", email = "guolin.ke@microsoft.com", role = c("aut", "cre")),
    person("Damien", "Soukhavong", email = "damien.soukhavong@skema.edu", role = c("aut")),
-    person("Yachen", "Yan", role = c("ctb")),
    person("James", "Lamb", email="jaylamb20@gmail.com", role = c("aut")),
+    person("Qi", "Meng", role = c("aut")),
+    person("Thomas", "Finley", role = c("aut")),
+    person("Taifeng", "Wang", role = c("aut")),
+    person("Wei", "Chen", role = c("aut")),
+    person("Weidong", "Ma", role = c("aut")),
+    person("Qiwei", "Ye", role = c("aut")),
+    person("Tie-Yan", "Liu", role = c("aut")),
+    person("Yachen", "Yan", role = c("ctb")),
+    person("Microsoft Corporation", role = c("cph")),
+    person("Dropbox, Inc.", role = c("cph")),
+    person("Jay", "Loden", role = c("cph")),
+    person("Dave", "Daeschler", role = c("cph")),
+    person("Giampaolo", "Rodola", role = c("cph")),
    person("IBM Corporation", role = c("ctb"))
    )
-Description: Tree based algorithms can be improved by introducing boosting frameworks. 'LightGBM' is one such framework, and this package offers an R interface to work with it.
+Description: Tree based algorithms can be improved by introducing boosting frameworks. 
+    'LightGBM' is one such framework, based on Ke, Guolin et al. (2017) <https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision>.
+    This package offers an R interface to work with it.
    It is designed to be distributed and efficient with the following advantages:
        1. Faster training speed and higher efficiency.
        2. Lower memory usage.

--- a/R-package/LICENSE
+++ b/R-package/LICENSE
-The MIT License (MIT)
-
-Copyright (c) Microsoft Corporation
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+YEAR: 2016
+COPYRIGHT HOLDER: Microsoft Corporation
\ No newline at end of file
--- a/R-package/R/callback.R
+++ b/R-package/R/callback.R
@@ -150,7 +150,7 @@ merge.eval.string <- function(env) {

  }

-  paste0(msg, collapse = "\t")
+  paste0(msg, collapse = "  ")

 }

@@ -173,7 +173,7 @@ cb.print.evaluation <- function(period = 1L) {

        # Check if message is existing
        if (nchar(msg) > 0L) {
-          cat(merge.eval.string(env = env), "\n")
+          print(merge.eval.string(env = env))
        }

      }
@@ -284,7 +284,12 @@ cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose =

    # Check if verbose or not
    if (isTRUE(verbose)) {
-      cat("Will train until there is no improvement in ", stopping_rounds, " rounds.\n\n", sep = "")
+      msg <- paste0(
+        "Will train until there is no improvement in "
+        , stopping_rounds
+        , " rounds."
+      )
+      print(msg)
    }

    # Internally treat everything as a maximization task
@@ -359,8 +364,7 @@ cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose =
            # Print message if verbose
            if (isTRUE(verbose)) {

-              cat("Early stopping, best iteration is:", "\n")
-              cat(best_msg[[i]], "\n")
+              print(paste0("Early stopping, best iteration is: ", best_msg[[i]]))

            }

@@ -380,8 +384,7 @@ cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose =

        # Print message if verbose
        if (isTRUE(verbose)) {
-          cat("Did not meet early stopping, best iteration is:", "\n")
-          cat(best_msg[[i]], "\n")
+          print(paste0("Did not meet early stopping, best iteration is: ", best_msg[[i]]))
        }

        # Store best iteration and stop

--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -721,7 +721,7 @@ Booster <- R6::R6Class(
 #'         number of columns corresponding to the number of trees.
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -780,7 +780,7 @@ predict.lgb.Booster <- function(object,
 #' @return lgb.Booster
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -840,7 +840,7 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
 #' @return lgb.Booster
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
@@ -886,7 +886,7 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
 #' @return json format of model
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
@@ -930,10 +930,10 @@ lgb.dump <- function(booster, num_iteration = NULL) {
 #'              (the default), evaluation results for all iterations will be returned.
 #' @param is_err TRUE will return evaluation error instead
 #'
-#' @return vector of evaluation result
+#' @return numeric vector of evaluation result
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' # train a regression model
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train

--- a/R-package/R/lgb.Dataset.R
+++ b/R-package/R/lgb.Dataset.R
@@ -720,7 +720,7 @@ Dataset <- R6::R6Class(
 #' @return constructed dataset
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -766,7 +766,7 @@ lgb.Dataset <- function(data,
 #' @return constructed dataset
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -793,12 +793,13 @@ lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) {
 #' @param dataset Object of class \code{lgb.Dataset}
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
 #' lgb.Dataset.construct(dtrain)
 #' }
+#' @return constructed dataset
 #' @export
 lgb.Dataset.construct <- function(dataset) {

@@ -824,7 +825,7 @@ lgb.Dataset.construct <- function(dataset) {
 #' be directly used with an \code{lgb.Dataset} object.
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -858,7 +859,7 @@ dim.lgb.Dataset <- function(x, ...) {
 #' Since row names are irrelevant, it is recommended to use \code{colnames} directly.
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -869,6 +870,7 @@ dim.lgb.Dataset <- function(x, ...) {
 #' print(dtrain, verbose = TRUE)
 #' }
 #' @rdname dimnames.lgb.Dataset
+#' @return A list with the dimension names of the dataset
 #' @export
 dimnames.lgb.Dataset <- function(x) {

@@ -883,6 +885,7 @@ dimnames.lgb.Dataset <- function(x) {
 }

 #' @rdname dimnames.lgb.Dataset
+#' @return A list with the dimension names of the dataset
 #' @export
 `dimnames<-.lgb.Dataset` <- function(x, value) {

@@ -929,7 +932,7 @@ dimnames.lgb.Dataset <- function(x) {
 #' @return constructed sub dataset
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -944,6 +947,7 @@ slice <- function(dataset, ...) {
 }

 #' @rdname slice
+#' @return constructed sub dataset
 #' @export
 slice.lgb.Dataset <- function(dataset, idxset, ...) {

@@ -976,7 +980,7 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) {
 #' }
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -994,6 +998,7 @@ getinfo <- function(dataset, ...) {
 }

 #' @rdname getinfo
+#' @return info data
 #' @export
 getinfo.lgb.Dataset <- function(dataset, name, ...) {

@@ -1013,7 +1018,7 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
 #' @param name the name of the field to get
 #' @param info the specific field of information to set
 #' @param ... other parameters
-#' @return passed object
+#' @return the dataset you passed in
 #'
 #' @details
 #' The \code{name} field can be one of the following:
@@ -1029,7 +1034,7 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
 #' }
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1047,6 +1052,7 @@ setinfo <- function(dataset, ...) {
 }

 #' @rdname setinfo
+#' @return the dataset you passed in
 #' @export
 setinfo.lgb.Dataset <- function(dataset, name, info, ...) {

@@ -1066,10 +1072,10 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
 #' @param categorical_feature categorical features. This can either be a character vector of feature
 #'                            names or an integer vector with the indices of the features (e.g.
 #'                            \code{c(1L, 10L)} to say "the first and tenth columns").
-#' @return passed dataset
+#' @return the dataset you passed in
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1097,10 +1103,10 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
 #' @param dataset object of class \code{lgb.Dataset}
 #' @param reference object of class \code{lgb.Dataset}
 #'
-#' @return passed dataset
+#' @return the dataset you passed in
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package ="lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1129,10 +1135,10 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
 #' @param dataset object of class \code{lgb.Dataset}
 #' @param fname object filename of output file
 #'
-#' @return passed dataset
+#' @return the dataset you passed in
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)

--- a/R-package/R/lgb.convert_with_rules.R
+++ b/R-package/R/lgb.convert_with_rules.R
@@ -68,7 +68,7 @@
 #'         \code{lgb.Dataset}.
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(iris)
 #'
 #' str(iris)

--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -52,7 +52,7 @@ CVBooster <- R6::R6Class(
 #' @return a trained model \code{lgb.CVBooster}.
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -466,7 +466,7 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, group, params) {

    # When doing group, stratified is not possible (only random selection)
    if (nfold > length(group)) {
-      stop("\n\tYou requested too many folds for the number of available groups.\n")
+      stop("\nYou requested too many folds for the number of available groups.\n")
    }

    # Degroup the groups

--- a/R-package/R/lgb.importance.R
+++ b/R-package/R/lgb.importance.R
@@ -13,7 +13,7 @@
 #' }
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)

--- a/R-package/R/lgb.interprete.R
+++ b/R-package/R/lgb.interprete.R
@@ -16,7 +16,7 @@
 #'         Contribution columns to each class.
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' Logit <- function(x) log(x / (1.0 - x))
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train

--- a/R-package/R/lgb.model.dt.tree.R
+++ b/R-package/R/lgb.model.dt.tree.R
@@ -28,7 +28,7 @@
 #' }
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)

--- a/R-package/R/lgb.plot.importance.R
+++ b/R-package/R/lgb.plot.importance.R
@@ -18,7 +18,7 @@
 #' and silently returns a processed data.table with \code{top_n} features sorted by defined importance.
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)

--- a/R-package/R/lgb.plot.interpretation.R
+++ b/R-package/R/lgb.plot.interpretation.R
@@ -15,7 +15,7 @@
 #' The \code{lgb.plot.interpretation} function creates a \code{barplot}.
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' Logit <- function(x) {
 #'   log(x / (1.0 - x))
 #' }

--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
@@ -26,7 +26,7 @@
 #' @return a trained booster model \code{lgb.Booster}.
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)

--- a/R-package/R/lgb.unloader.R
+++ b/R-package/R/lgb.unloader.R
@@ -14,7 +14,7 @@
 #' @return NULL invisibly.
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)

--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
@@ -97,6 +97,7 @@ NULL
 #'                             CPU using hyper-threading to generate 2 threads per CPU core).}
 #'     }
 #' @inheritSection lgb_shared_params Early Stopping
+#' @return a trained \code{lgb.Booster}
 #' @export
 lightgbm <- function(data,
                     label = NULL,

--- a/R-package/R/readRDS.lgb.Booster.R
+++ b/R-package/R/readRDS.lgb.Booster.R
@@ -4,10 +4,10 @@
 #' @param file a connection or the name of the file where the R object is saved to or read from.
 #' @param refhook a hook function for handling reference objects.
 #'
-#' @return \code{lgb.Booster}.
+#' @return \code{lgb.Booster}
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train