utils.R 8.79 KB
Newer Older
1
lgb.is.Booster <- function(x) {
2
  return(lgb.check.r6.class(object = x, name = "lgb.Booster"))
3
}
4

5
lgb.is.Dataset <- function(x) {
6
  return(lgb.check.r6.class(object = x, name = "lgb.Dataset"))
7
}
8

Guolin Ke's avatar
Guolin Ke committed
9
10
11
12
13
14
15
16
lgb.null.handle <- function() {
  if (.Machine$sizeof.pointer == 8L) {
    return(NA_real_)
  } else {
    return(NA_integer_)
  }
}

17
lgb.is.null.handle <- function(x) {
18
  return(is.null(x) || is.na(x))
19
}
Guolin Ke's avatar
Guolin Ke committed
20
21

lgb.encode.char <- function(arr, len) {
22
  if (!is.raw(arr)) {
23
    stop("lgb.encode.char: Can only encode from raw type")
Guolin Ke's avatar
Guolin Ke committed
24
  }
25
  return(rawToChar(arr[seq_len(len)]))
Guolin Ke's avatar
Guolin Ke committed
26
27
}

28
29
# [description] Get the most recent error stored on the C++ side and raise it
#               as an R error.
30
31
lgb.last_error <- function() {
  err_msg <- .Call(
32
    LGBM_GetLastError_R
33
  )
34
  stop("api error: ", err_msg)
35
  return(invisible(NULL))
Guolin Ke's avatar
Guolin Ke committed
36
37
}
lgb.params2str <- function(params, ...) {
38

39
  # Check for a list as input
40
  if (!identical(class(params), "list")) {
41
42
    stop("params must be a list")
  }
43

44
  # Split parameter names
Guolin Ke's avatar
Guolin Ke committed
45
  names(params) <- gsub("\\.", "_", names(params))
46

47
  # Merge parameters from the params and the dots-expansion
Guolin Ke's avatar
Guolin Ke committed
48
49
  dot_params <- list(...)
  names(dot_params) <- gsub("\\.", "_", names(dot_params))
50

51
  # Check for identical parameters
52
  if (length(intersect(names(params), names(dot_params))) > 0L) {
53
54
55
56
57
58
59
    stop(
      "Same parameters in "
      , sQuote("params")
      , " and in the call are not allowed. Please check your "
      , sQuote("params")
      , " list"
    )
60
  }
61

62
  # Merge parameters
Guolin Ke's avatar
Guolin Ke committed
63
  params <- c(params, dot_params)
64

65
66
  # Setup temporary variable
  ret <- list()
67

68
  # Perform key value join
Guolin Ke's avatar
Guolin Ke committed
69
  for (key in names(params)) {
70

71
72
73
74
75
76
77
78
79
80
81
    # If a parameter has multiple values, join those values together with commas.
    # trimws() is necessary because format() will pad to make strings the same width
    val <- paste0(
      trimws(
        format(
          x = params[[key]]
          , scientific = FALSE
        )
      )
      , collapse = ","
    )
82
    if (nchar(val) <= 0L) next # Skip join
83

84
    # Join key value
Guolin Ke's avatar
Guolin Ke committed
85
    pair <- paste0(c(key, val), collapse = "=")
86
    ret <- c(ret, pair)
87

Guolin Ke's avatar
Guolin Ke committed
88
  }
89

90
  # Check ret length
91
  if (length(ret) == 0L) {
92
    return(lgb.c_str(x = ""))
Guolin Ke's avatar
Guolin Ke committed
93
  }
94

95
  return(lgb.c_str(x = paste0(ret, collapse = " ")))
96

Guolin Ke's avatar
Guolin Ke committed
97
98
}

99
lgb.check_interaction_constraints <- function(interaction_constraints, column_names) {
100
101
102
103

  # Convert interaction constraints to feature numbers
  string_constraints <- list()

104
  if (!is.null(interaction_constraints)) {
105

106
    if (!methods::is(interaction_constraints, "list")) {
107
108
        stop("interaction_constraints must be a list")
    }
109
    if (!all(sapply(interaction_constraints, function(x) {is.character(x) || is.numeric(x)}))) {
110
111
112
        stop("every element in interaction_constraints must be a character vector or numeric vector")
    }

113
    for (constraint in interaction_constraints) {
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156

      # Check for character name
      if (is.character(constraint)) {

          constraint_indices <- as.integer(match(constraint, column_names) - 1L)

          # Provided indices, but some indices are not existing?
          if (sum(is.na(constraint_indices)) > 0L) {
            stop(
              "supplied an unknown feature in interaction_constraints "
              , sQuote(constraint[is.na(constraint_indices)])
            )
          }

        } else {

          # Check that constraint indices are at most number of features
          if (max(constraint) > length(column_names)) {
            stop(
              "supplied a too large value in interaction_constraints: "
              , max(constraint)
              , " but only "
              , length(column_names)
              , " features"
            )
          }

          # Store indices as [0, n-1] indexed instead of [1, n] indexed
          constraint_indices <- as.integer(constraint - 1L)

        }

        # Convert constraint to string
        constraint_string <- paste0("[", paste0(constraint_indices, collapse = ","), "]")
        string_constraints <- append(string_constraints, constraint_string)
    }

  }

  return(string_constraints)

}

Guolin Ke's avatar
Guolin Ke committed
157
lgb.c_str <- function(x) {
158

Guolin Ke's avatar
Guolin Ke committed
159
  ret <- charToRaw(as.character(x))
160
  ret <- c(ret, as.raw(0L))
161
  return(ret)
162

Guolin Ke's avatar
Guolin Ke committed
163
164
165
}

lgb.check.r6.class <- function(object, name) {
166

167
  # Check for non-existence of R6 class or named class
168
  return(all(c("R6", name) %in% class(object)))
169

Guolin Ke's avatar
Guolin Ke committed
170
171
172
}

lgb.check.obj <- function(params, obj) {
173

174
  # List known objectives in a vector
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
  OBJECTIVES <- c(
    "regression"
    , "regression_l1"
    , "regression_l2"
    , "mean_squared_error"
    , "mse"
    , "l2_root"
    , "root_mean_squared_error"
    , "rmse"
    , "mean_absolute_error"
    , "mae"
    , "quantile"
    , "huber"
    , "fair"
    , "poisson"
    , "binary"
    , "lambdarank"
    , "multiclass"
    , "softmax"
    , "multiclassova"
    , "multiclass_ova"
    , "ova"
    , "ovr"
    , "xentropy"
    , "cross_entropy"
    , "xentlambda"
    , "cross_entropy_lambda"
    , "mean_absolute_percentage_error"
    , "mape"
    , "gamma"
    , "tweedie"
206
207
208
209
210
    , "rank_xendcg"
    , "xendcg"
    , "xe_ndcg"
    , "xe_ndcg_mart"
    , "xendcg_mart"
211
  )
212

213
  # Check whether the objective is empty or not, and take it from params if needed
214
215
216
  if (!is.null(obj)) {
    params$objective <- obj
  }
217

218
  # Check whether the objective is a character
219
  if (is.character(params$objective)) {
220

221
    # If the objective is a character, check if it is a known objective
222
    if (!(params$objective %in% OBJECTIVES)) {
223

224
      stop("lgb.check.obj: objective name error should be one of (", paste0(OBJECTIVES, collapse = ", "), ")")
225

Guolin Ke's avatar
Guolin Ke committed
226
    }
227

228
  } else if (!is.function(params$objective)) {
229

230
    stop("lgb.check.obj: objective should be a character or a function")
231

Guolin Ke's avatar
Guolin Ke committed
232
  }
233

234
  return(params)
235

Guolin Ke's avatar
Guolin Ke committed
236
237
}

238
# [description]
239
240
241
242
#     Take any character values from eval and store them in params$metric.
#     This has to account for the fact that `eval` could be a character vector,
#     a function, a list of functions, or a list with a mix of strings and
#     functions
Guolin Ke's avatar
Guolin Ke committed
243
lgb.check.eval <- function(params, eval) {
244

245
246
  if (is.null(params$metric)) {
    params$metric <- list()
247
248
  } else if (is.character(params$metric)) {
    params$metric <- as.list(params$metric)
249
  }
250

251
252
253
254
255
256
257
258
259
  # if 'eval' is a character vector or list, find the character
  # elements and add them to 'metric'
  if (!is.function(eval)) {
    for (i in seq_along(eval)) {
      element <- eval[[i]]
      if (is.character(element)) {
        params$metric <- append(params$metric, element)
      }
    }
260
  }
261

262
263
264
265
266
267
268
269
270
  # If more than one character metric was given, then "None" should
  # not be included
  if (length(params$metric) > 1L) {
    params$metric <- Filter(
        f = function(metric) {
          !(metric %in% .NO_METRIC_STRINGS())
        }
        , x = params$metric
    )
271
272
  }

273
274
275
  # duplicate metrics should be filtered out
  params$metric <- as.list(unique(unlist(params$metric)))

276
  return(params)
Guolin Ke's avatar
Guolin Ke committed
277
}
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336


# [description]
#
#     Resolve differences between passed-in keyword arguments, parameters,
#     and parameter aliases. This function exists because some functions in the
#     package take in parameters through their own keyword arguments other than
#     the `params` list.
#
#     If the same underlying parameter is provided multiple
#     ways, the first item in this list is used:
#
#         1. the main (non-alias) parameter found in `params`
#         2. the first alias of that parameter found in `params`
#         3. the keyword argument passed in
#
#     For example, "num_iterations" can also be provided to lgb.train()
#     via keyword "nrounds". lgb.train() will choose one value for this parameter
#     based on the first match in this list:
#
#         1. params[["num_iterations]]
#         2. the first alias of "num_iterations" found in params
#         3. the nrounds keyword argument
#
#     If multiple aliases are found in `params` for the same parameter, they are
#     all removed before returning `params`.
#
# [return]
#     params with num_iterations set to the chosen value, and other aliases
#     of num_iterations removed
lgb.check.wrapper_param <- function(main_param_name, params, alternative_kwarg_value) {

  aliases <- .PARAMETER_ALIASES()[[main_param_name]]
  aliases_provided <- names(params)[names(params) %in% aliases]
  aliases_provided <- aliases_provided[aliases_provided != main_param_name]

  # prefer the main parameter
  if (!is.null(params[[main_param_name]])) {
    for (param in aliases_provided) {
      params[[param]] <- NULL
    }
    return(params)
  }

  # if the main parameter wasn't proovided, prefer the first alias
  if (length(aliases_provided) > 0L) {
    first_param <- aliases_provided[1L]
    params[[main_param_name]] <- params[[first_param]]
    for (param in aliases_provided) {
      params[[param]] <- NULL
    }
    return(params)
  }

  # if not provided in params at all, use the alternative value provided
  # through a keyword argument from lgb.train(), lgb.cv(), etc.
  params[[main_param_name]] <- alternative_kwarg_value
  return(params)
}