utils.R 7.32 KB
Newer Older
1
.is_Booster <- function(x) {
2
  return(all(c("R6", "lgb.Booster") %in% class(x)))  # nolint: class_equals
3
}
4

5
.is_Dataset <- function(x) {
6
  return(all(c("R6", "lgb.Dataset") %in% class(x)))  # nolint: class_equals
7
8
}

9
.is_Predictor <- function(x) {
10
  return(all(c("R6", "lgb.Predictor") %in% class(x)))  # nolint: class_equals
11
}
12

13
.is_null_handle <- function(x) {
14
15
16
17
18
19
  if (is.null(x)) {
    return(TRUE)
  }
  return(
    isTRUE(.Call(LGBM_HandleIsNull_R, x))
  )
20
}
Guolin Ke's avatar
Guolin Ke committed
21

22
.params2str <- function(params) {
23

24
  if (!identical(class(params), "list")) {
25
26
    stop("params must be a list")
  }
27

28
  names(params) <- gsub(".", "_", names(params), fixed = TRUE)
29
  param_names <- names(params)
30
  ret <- list()
31

32
  # Perform key value join
33
  for (i in seq_along(params)) {
34

35
36
37
38
39
    # If a parameter has multiple values, join those values together with commas.
    # trimws() is necessary because format() will pad to make strings the same width
    val <- paste0(
      trimws(
        format(
40
          x = unname(params[[i]])
41
42
43
44
45
          , scientific = FALSE
        )
      )
      , collapse = ","
    )
46
    if (nchar(val) <= 0L) next # Skip join
47

48
    # Join key value
49
    pair <- paste0(c(param_names[[i]], val), collapse = "=")
50
    ret <- c(ret, pair)
51

Guolin Ke's avatar
Guolin Ke committed
52
  }
53

54
  if (length(ret) == 0L) {
55
    return("")
Guolin Ke's avatar
Guolin Ke committed
56
  }
57

58
  return(paste0(ret, collapse = " "))
59

Guolin Ke's avatar
Guolin Ke committed
60
61
}

62
.check_interaction_constraints <- function(interaction_constraints, column_names) {
63
64
65
66

  # Convert interaction constraints to feature numbers
  string_constraints <- list()

67
  if (!is.null(interaction_constraints)) {
68

69
    if (!methods::is(interaction_constraints, "list")) {
70
71
        stop("interaction_constraints must be a list")
    }
72
73
74
75
76
77
78
    constraint_is_character_or_numeric <- sapply(
        X = interaction_constraints
        , FUN = function(x) {
            return(is.character(x) || is.numeric(x))
        }
    )
    if (!all(constraint_is_character_or_numeric)) {
79
80
81
        stop("every element in interaction_constraints must be a character vector or numeric vector")
    }

82
    for (constraint in interaction_constraints) {
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125

      # Check for character name
      if (is.character(constraint)) {

          constraint_indices <- as.integer(match(constraint, column_names) - 1L)

          # Provided indices, but some indices are not existing?
          if (sum(is.na(constraint_indices)) > 0L) {
            stop(
              "supplied an unknown feature in interaction_constraints "
              , sQuote(constraint[is.na(constraint_indices)])
            )
          }

        } else {

          # Check that constraint indices are at most number of features
          if (max(constraint) > length(column_names)) {
            stop(
              "supplied a too large value in interaction_constraints: "
              , max(constraint)
              , " but only "
              , length(column_names)
              , " features"
            )
          }

          # Store indices as [0, n-1] indexed instead of [1, n] indexed
          constraint_indices <- as.integer(constraint - 1L)

        }

        # Convert constraint to string
        constraint_string <- paste0("[", paste0(constraint_indices, collapse = ","), "]")
        string_constraints <- append(string_constraints, constraint_string)
    }

  }

  return(string_constraints)

}

Guolin Ke's avatar
Guolin Ke committed
126

127
# [description]
128
129
130
131
#     Take any character values from eval and store them in params$metric.
#     This has to account for the fact that `eval` could be a character vector,
#     a function, a list of functions, or a list with a mix of strings and
#     functions
132
.check_eval <- function(params, eval) {
133

134
135
  if (is.null(params$metric)) {
    params$metric <- list()
136
137
  } else if (is.character(params$metric)) {
    params$metric <- as.list(params$metric)
138
  }
139

140
141
142
143
144
145
146
147
148
  # if 'eval' is a character vector or list, find the character
  # elements and add them to 'metric'
  if (!is.function(eval)) {
    for (i in seq_along(eval)) {
      element <- eval[[i]]
      if (is.character(element)) {
        params$metric <- append(params$metric, element)
      }
    }
149
  }
150

151
152
153
154
155
156
157
158
159
  # If more than one character metric was given, then "None" should
  # not be included
  if (length(params$metric) > 1L) {
    params$metric <- Filter(
        f = function(metric) {
          !(metric %in% .NO_METRIC_STRINGS())
        }
        , x = params$metric
    )
160
161
  }

162
163
164
  # duplicate metrics should be filtered out
  params$metric <- as.list(unique(unlist(params$metric)))

165
  return(params)
Guolin Ke's avatar
Guolin Ke committed
166
}
167
168
169
170
171
172
173
174
175
176
177
178
179


# [description]
#
#     Resolve differences between passed-in keyword arguments, parameters,
#     and parameter aliases. This function exists because some functions in the
#     package take in parameters through their own keyword arguments other than
#     the `params` list.
#
#     If the same underlying parameter is provided multiple
#     ways, the first item in this list is used:
#
#         1. the main (non-alias) parameter found in `params`
180
#         2. the alias with the highest priority found in `params`
181
182
183
184
185
186
187
#         3. the keyword argument passed in
#
#     For example, "num_iterations" can also be provided to lgb.train()
#     via keyword "nrounds". lgb.train() will choose one value for this parameter
#     based on the first match in this list:
#
#         1. params[["num_iterations]]
188
#         2. the highest priority alias of "num_iterations" found in params
189
190
191
192
193
194
195
196
#         3. the nrounds keyword argument
#
#     If multiple aliases are found in `params` for the same parameter, they are
#     all removed before returning `params`.
#
# [return]
#     params with num_iterations set to the chosen value, and other aliases
#     of num_iterations removed
197
.check_wrapper_param <- function(main_param_name, params, alternative_kwarg_value) {
198
199

  aliases <- .PARAMETER_ALIASES()[[main_param_name]]
200
  aliases_provided <- aliases[aliases %in% names(params)]
201
202
203
204
205
206
207
208
209
210
  aliases_provided <- aliases_provided[aliases_provided != main_param_name]

  # prefer the main parameter
  if (!is.null(params[[main_param_name]])) {
    for (param in aliases_provided) {
      params[[param]] <- NULL
    }
    return(params)
  }

211
  # if the main parameter wasn't provided, prefer the first alias
212
213
214
215
216
217
218
219
220
221
222
223
224
225
  if (length(aliases_provided) > 0L) {
    first_param <- aliases_provided[1L]
    params[[main_param_name]] <- params[[first_param]]
    for (param in aliases_provided) {
      params[[param]] <- NULL
    }
    return(params)
  }

  # if not provided in params at all, use the alternative value provided
  # through a keyword argument from lgb.train(), lgb.cv(), etc.
  params[[main_param_name]] <- alternative_kwarg_value
  return(params)
}
226
227

#' @importFrom parallel detectCores
228
.get_default_num_threads <- function() {
229
  if (requireNamespace("RhpcBLASctl", quietly = TRUE)) {  # nolint: undesirable_function
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
    return(RhpcBLASctl::get_num_cores())
  } else {
    msg <- "Optional package 'RhpcBLASctl' not found."
    cores <- 0L
    if (Sys.info()["sysname"] != "Linux") {
      cores <- parallel::detectCores(logical = FALSE)
      if (is.na(cores) || cores < 0L) {
        cores <- 0L
      }
    }
    if (cores == 0L) {
      msg <- paste(msg, "Will use default number of OpenMP threads.", sep = " ")
    } else {
      msg <- paste(msg, "Detection of CPU cores might not be accurate.", sep = " ")
    }
    warning(msg)
    return(cores)
  }
}
249

250
.equal_or_both_null <- function(a, b) {
251
252
253
254
255
256
257
258
259
260
261
262
  if (is.null(a)) {
    if (!is.null(b)) {
      return(FALSE)
    }
    return(TRUE)
  } else {
    if (is.null(b)) {
      return(FALSE)
    }
    return(a == b)
  }
}