utils.R 9.2 KB
Newer Older
1
lgb.is.Booster <- function(x) {
2
  return(lgb.check.r6.class(object = x, name = "lgb.Booster"))
3
}
4

5
lgb.is.Dataset <- function(x) {
6
  return(lgb.check.r6.class(object = x, name = "lgb.Dataset"))
7
}
8

Guolin Ke's avatar
Guolin Ke committed
9
10
11
12
13
14
15
16
lgb.null.handle <- function() {
  if (.Machine$sizeof.pointer == 8L) {
    return(NA_real_)
  } else {
    return(NA_integer_)
  }
}

17
lgb.is.null.handle <- function(x) {
18
  return(is.null(x) || is.na(x))
19
}
Guolin Ke's avatar
Guolin Ke committed
20
21

lgb.encode.char <- function(arr, len) {
22
  if (!is.raw(arr)) {
23
    stop("lgb.encode.char: Can only encode from raw type")
Guolin Ke's avatar
Guolin Ke committed
24
  }
25
  return(rawToChar(arr[seq_len(len)]))
Guolin Ke's avatar
Guolin Ke committed
26
27
}

28
29
# [description] Raise an error. Before raising that error, check for any error message
#               stored in a buffer on the C++ side.
30
31
32
33
34
35
lgb.last_error <- function() {
  # Perform text error buffering
  buf_len <- 200L
  act_len <- 0L
  err_msg <- raw(buf_len)
  err_msg <- .Call(
36
    LGBM_GetLastError_R
37
38
39
40
41
42
43
44
45
46
    , buf_len
    , act_len
    , err_msg
  )

  # Check error buffer
  if (act_len > buf_len) {
    buf_len <- act_len
    err_msg <- raw(buf_len)
    err_msg <- .Call(
47
      LGBM_GetLastError_R
48
49
50
51
52
53
      , buf_len
      , act_len
      , err_msg
    )
  }

54
  stop("api error: ", lgb.encode.char(arr = err_msg, len = act_len))
55
56

  return(invisible(NULL))
57

Guolin Ke's avatar
Guolin Ke committed
58
59
60
}

lgb.params2str <- function(params, ...) {
61

62
  # Check for a list as input
63
  if (!identical(class(params), "list")) {
64
65
    stop("params must be a list")
  }
66

67
  # Split parameter names
Guolin Ke's avatar
Guolin Ke committed
68
  names(params) <- gsub("\\.", "_", names(params))
69

70
  # Merge parameters from the params and the dots-expansion
Guolin Ke's avatar
Guolin Ke committed
71
72
  dot_params <- list(...)
  names(dot_params) <- gsub("\\.", "_", names(dot_params))
73

74
  # Check for identical parameters
75
  if (length(intersect(names(params), names(dot_params))) > 0L) {
76
77
78
79
80
81
82
    stop(
      "Same parameters in "
      , sQuote("params")
      , " and in the call are not allowed. Please check your "
      , sQuote("params")
      , " list"
    )
83
  }
84

85
  # Merge parameters
Guolin Ke's avatar
Guolin Ke committed
86
  params <- c(params, dot_params)
87

88
89
  # Setup temporary variable
  ret <- list()
90

91
  # Perform key value join
Guolin Ke's avatar
Guolin Ke committed
92
  for (key in names(params)) {
93

94
95
96
97
98
99
100
101
102
103
104
    # If a parameter has multiple values, join those values together with commas.
    # trimws() is necessary because format() will pad to make strings the same width
    val <- paste0(
      trimws(
        format(
          x = params[[key]]
          , scientific = FALSE
        )
      )
      , collapse = ","
    )
105
    if (nchar(val) <= 0L) next # Skip join
106

107
    # Join key value
Guolin Ke's avatar
Guolin Ke committed
108
    pair <- paste0(c(key, val), collapse = "=")
109
    ret <- c(ret, pair)
110

Guolin Ke's avatar
Guolin Ke committed
111
  }
112

113
  # Check ret length
114
  if (length(ret) == 0L) {
115
    return(lgb.c_str(x = ""))
Guolin Ke's avatar
Guolin Ke committed
116
  }
117

118
  return(lgb.c_str(x = paste0(ret, collapse = " ")))
119

Guolin Ke's avatar
Guolin Ke committed
120
121
}

122
lgb.check_interaction_constraints <- function(interaction_constraints, column_names) {
123
124
125
126

  # Convert interaction constraints to feature numbers
  string_constraints <- list()

127
  if (!is.null(interaction_constraints)) {
128

129
    if (!methods::is(interaction_constraints, "list")) {
130
131
        stop("interaction_constraints must be a list")
    }
132
    if (!all(sapply(interaction_constraints, function(x) {is.character(x) || is.numeric(x)}))) {
133
134
135
        stop("every element in interaction_constraints must be a character vector or numeric vector")
    }

136
    for (constraint in interaction_constraints) {
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179

      # Check for character name
      if (is.character(constraint)) {

          constraint_indices <- as.integer(match(constraint, column_names) - 1L)

          # Provided indices, but some indices are not existing?
          if (sum(is.na(constraint_indices)) > 0L) {
            stop(
              "supplied an unknown feature in interaction_constraints "
              , sQuote(constraint[is.na(constraint_indices)])
            )
          }

        } else {

          # Check that constraint indices are at most number of features
          if (max(constraint) > length(column_names)) {
            stop(
              "supplied a too large value in interaction_constraints: "
              , max(constraint)
              , " but only "
              , length(column_names)
              , " features"
            )
          }

          # Store indices as [0, n-1] indexed instead of [1, n] indexed
          constraint_indices <- as.integer(constraint - 1L)

        }

        # Convert constraint to string
        constraint_string <- paste0("[", paste0(constraint_indices, collapse = ","), "]")
        string_constraints <- append(string_constraints, constraint_string)
    }

  }

  return(string_constraints)

}

Guolin Ke's avatar
Guolin Ke committed
180
lgb.c_str <- function(x) {
181

Guolin Ke's avatar
Guolin Ke committed
182
  ret <- charToRaw(as.character(x))
183
  ret <- c(ret, as.raw(0L))
184
  return(ret)
185

Guolin Ke's avatar
Guolin Ke committed
186
187
188
}

lgb.check.r6.class <- function(object, name) {
189

190
  # Check for non-existence of R6 class or named class
191
  return(all(c("R6", name) %in% class(object)))
192

Guolin Ke's avatar
Guolin Ke committed
193
194
195
}

lgb.check.obj <- function(params, obj) {
196

197
  # List known objectives in a vector
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
  OBJECTIVES <- c(
    "regression"
    , "regression_l1"
    , "regression_l2"
    , "mean_squared_error"
    , "mse"
    , "l2_root"
    , "root_mean_squared_error"
    , "rmse"
    , "mean_absolute_error"
    , "mae"
    , "quantile"
    , "huber"
    , "fair"
    , "poisson"
    , "binary"
    , "lambdarank"
    , "multiclass"
    , "softmax"
    , "multiclassova"
    , "multiclass_ova"
    , "ova"
    , "ovr"
    , "xentropy"
    , "cross_entropy"
    , "xentlambda"
    , "cross_entropy_lambda"
    , "mean_absolute_percentage_error"
    , "mape"
    , "gamma"
    , "tweedie"
229
230
231
232
233
    , "rank_xendcg"
    , "xendcg"
    , "xe_ndcg"
    , "xe_ndcg_mart"
    , "xendcg_mart"
234
  )
235

236
  # Check whether the objective is empty or not, and take it from params if needed
237
238
239
  if (!is.null(obj)) {
    params$objective <- obj
  }
240

241
  # Check whether the objective is a character
242
  if (is.character(params$objective)) {
243

244
    # If the objective is a character, check if it is a known objective
245
    if (!(params$objective %in% OBJECTIVES)) {
246

247
      stop("lgb.check.obj: objective name error should be one of (", paste0(OBJECTIVES, collapse = ", "), ")")
248

Guolin Ke's avatar
Guolin Ke committed
249
    }
250

251
  } else if (!is.function(params$objective)) {
252

253
    stop("lgb.check.obj: objective should be a character or a function")
254

Guolin Ke's avatar
Guolin Ke committed
255
  }
256

257
  return(params)
258

Guolin Ke's avatar
Guolin Ke committed
259
260
}

261
# [description]
262
263
264
265
#     Take any character values from eval and store them in params$metric.
#     This has to account for the fact that `eval` could be a character vector,
#     a function, a list of functions, or a list with a mix of strings and
#     functions
Guolin Ke's avatar
Guolin Ke committed
266
lgb.check.eval <- function(params, eval) {
267

268
269
  if (is.null(params$metric)) {
    params$metric <- list()
270
271
  } else if (is.character(params$metric)) {
    params$metric <- as.list(params$metric)
272
  }
273

274
275
276
277
278
279
280
281
282
  # if 'eval' is a character vector or list, find the character
  # elements and add them to 'metric'
  if (!is.function(eval)) {
    for (i in seq_along(eval)) {
      element <- eval[[i]]
      if (is.character(element)) {
        params$metric <- append(params$metric, element)
      }
    }
283
  }
284

285
286
287
288
289
290
291
292
293
  # If more than one character metric was given, then "None" should
  # not be included
  if (length(params$metric) > 1L) {
    params$metric <- Filter(
        f = function(metric) {
          !(metric %in% .NO_METRIC_STRINGS())
        }
        , x = params$metric
    )
294
295
  }

296
297
298
  # duplicate metrics should be filtered out
  params$metric <- as.list(unique(unlist(params$metric)))

299
  return(params)
Guolin Ke's avatar
Guolin Ke committed
300
}
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359


# [description]
#
#     Resolve differences between passed-in keyword arguments, parameters,
#     and parameter aliases. This function exists because some functions in the
#     package take in parameters through their own keyword arguments other than
#     the `params` list.
#
#     If the same underlying parameter is provided multiple
#     ways, the first item in this list is used:
#
#         1. the main (non-alias) parameter found in `params`
#         2. the first alias of that parameter found in `params`
#         3. the keyword argument passed in
#
#     For example, "num_iterations" can also be provided to lgb.train()
#     via keyword "nrounds". lgb.train() will choose one value for this parameter
#     based on the first match in this list:
#
#         1. params[["num_iterations]]
#         2. the first alias of "num_iterations" found in params
#         3. the nrounds keyword argument
#
#     If multiple aliases are found in `params` for the same parameter, they are
#     all removed before returning `params`.
#
# [return]
#     params with num_iterations set to the chosen value, and other aliases
#     of num_iterations removed
lgb.check.wrapper_param <- function(main_param_name, params, alternative_kwarg_value) {

  aliases <- .PARAMETER_ALIASES()[[main_param_name]]
  aliases_provided <- names(params)[names(params) %in% aliases]
  aliases_provided <- aliases_provided[aliases_provided != main_param_name]

  # prefer the main parameter
  if (!is.null(params[[main_param_name]])) {
    for (param in aliases_provided) {
      params[[param]] <- NULL
    }
    return(params)
  }

  # if the main parameter wasn't proovided, prefer the first alias
  if (length(aliases_provided) > 0L) {
    first_param <- aliases_provided[1L]
    params[[main_param_name]] <- params[[first_param]]
    for (param in aliases_provided) {
      params[[param]] <- NULL
    }
    return(params)
  }

  # if not provided in params at all, use the alternative value provided
  # through a keyword argument from lgb.train(), lgb.cv(), etc.
  params[[main_param_name]] <- alternative_kwarg_value
  return(params)
}