efficient_many_training.R 1.32 KB
Newer Older
1
2
3
4
# Efficient training means training without giving up too much RAM
# In the case of many trainings (like 100+ models), RAM will be eaten very quickly
# Therefore, it is essential to know a strategy to deal with such issue

5
# More results can be found here: https://github.com/microsoft/LightGBM/issues/879#issuecomment-326656580
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# Quote: "@Laurae2 Thanks for nice easily reproducible example (unlike mine).
# With reset=FALSE you get after 500 iterations (not 1000): OS reports 27GB usage, while R gc() reports 1.5GB.
# Just doing reset=TRUE will already improve things: OS reports 4.6GB.
# Doing reset=TRUE and calling gc() in the loop will have OS 1.3GB. Thanks for the latest tip."

# Load library
library(lightgbm)

# Generate fictive data of size 1M x 100
set.seed(11111)
x_data <- matrix(rnorm(n = 100000000, mean = 0, sd = 100), nrow = 1000000, ncol = 100)
y_data <- rnorm(n = 1000000, mean = 0, sd = 5)

# Create lgb.Dataset for training
data <- lgb.Dataset(x_data, label = y_data)
data$construct()

# Loop through a training of 1000 models, please check your RAM on your task manager
# It MUST remain constant (if not increasing very slightly)
gbm <- list()

for (i in 1:1000) {
  print(i)
29
30
31
32
33
34
  gbm[[i]] <- lgb.train(
      params = list(objective = "regression")
      , data = data
      , 1
      , reset_data = TRUE
  )
35
36
  gc(verbose = FALSE)
}