Unverified Commit 53ffba7c authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[R-package] move more finalizer logic into C++ side to address memory leaks (#4353)



* [R-package] move more finalizer logic intoo C++ side

* add C finalizers

* use gc()

* put skip() back

* Update R-package/tests/testthat/test_lgb.Booster.R
Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
parent f0bca1a2
...@@ -11,21 +11,12 @@ Booster <- R6::R6Class( ...@@ -11,21 +11,12 @@ Booster <- R6::R6Class(
# Finalize will free up the handles # Finalize will free up the handles
finalize = function() { finalize = function() {
.Call(
# Check the need for freeing handle LGBM_BoosterFree_R
if (!lgb.is.null.handle(x = private$handle)) { , private$handle
)
# Freeing up handle private$handle <- NULL
.Call(
LGBM_BoosterFree_R
, private$handle
)
private$handle <- NULL
}
return(invisible(NULL)) return(invisible(NULL))
}, },
# Initialize will create a starter booster # Initialize will create a starter booster
......
...@@ -8,21 +8,12 @@ Dataset <- R6::R6Class( ...@@ -8,21 +8,12 @@ Dataset <- R6::R6Class(
# Finalize will free up the handles # Finalize will free up the handles
finalize = function() { finalize = function() {
.Call(
# Check the need for freeing handle LGBM_DatasetFree_R
if (!lgb.is.null.handle(x = private$handle)) { , private$handle
)
# Freeing up handle private$handle <- NULL
.Call(
LGBM_DatasetFree_R
, private$handle
)
private$handle <- NULL
}
return(invisible(NULL)) return(invisible(NULL))
}, },
# Initialize will create a starter dataset # Initialize will create a starter dataset
......
...@@ -11,9 +11,8 @@ Predictor <- R6::R6Class( ...@@ -11,9 +11,8 @@ Predictor <- R6::R6Class(
finalize = function() { finalize = function() {
# Check the need for freeing handle # Check the need for freeing handle
if (private$need_free_handle && !lgb.is.null.handle(x = private$handle)) { if (private$need_free_handle) {
# Freeing up handle
.Call( .Call(
LGBM_BoosterFree_R LGBM_BoosterFree_R
, private$handle , private$handle
......
...@@ -46,6 +46,10 @@ SEXP LGBM_HandleIsNull_R(SEXP handle) { ...@@ -46,6 +46,10 @@ SEXP LGBM_HandleIsNull_R(SEXP handle) {
return Rf_ScalarLogical(R_ExternalPtrAddr(handle) == NULL); return Rf_ScalarLogical(R_ExternalPtrAddr(handle) == NULL);
} }
void _DatasetFinalizer(SEXP handle) {
LGBM_DatasetFree_R(handle);
}
SEXP LGBM_DatasetCreateFromFile_R(SEXP filename, SEXP LGBM_DatasetCreateFromFile_R(SEXP filename,
SEXP parameters, SEXP parameters,
SEXP reference) { SEXP reference) {
...@@ -59,6 +63,7 @@ SEXP LGBM_DatasetCreateFromFile_R(SEXP filename, ...@@ -59,6 +63,7 @@ SEXP LGBM_DatasetCreateFromFile_R(SEXP filename,
CHECK_CALL(LGBM_DatasetCreateFromFile(CHAR(Rf_asChar(filename)), CHAR(Rf_asChar(parameters)), CHECK_CALL(LGBM_DatasetCreateFromFile(CHAR(Rf_asChar(filename)), CHAR(Rf_asChar(parameters)),
ref, &handle)); ref, &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
UNPROTECT(1); UNPROTECT(1);
return ret; return ret;
R_API_END(); R_API_END();
...@@ -90,6 +95,7 @@ SEXP LGBM_DatasetCreateFromCSC_R(SEXP indptr, ...@@ -90,6 +95,7 @@ SEXP LGBM_DatasetCreateFromCSC_R(SEXP indptr,
p_data, C_API_DTYPE_FLOAT64, nindptr, ndata, p_data, C_API_DTYPE_FLOAT64, nindptr, ndata,
nrow, CHAR(Rf_asChar(parameters)), ref, &handle)); nrow, CHAR(Rf_asChar(parameters)), ref, &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
UNPROTECT(1); UNPROTECT(1);
return ret; return ret;
R_API_END(); R_API_END();
...@@ -113,6 +119,7 @@ SEXP LGBM_DatasetCreateFromMat_R(SEXP data, ...@@ -113,6 +119,7 @@ SEXP LGBM_DatasetCreateFromMat_R(SEXP data,
CHECK_CALL(LGBM_DatasetCreateFromMat(p_mat, C_API_DTYPE_FLOAT64, nrow, ncol, COL_MAJOR, CHECK_CALL(LGBM_DatasetCreateFromMat(p_mat, C_API_DTYPE_FLOAT64, nrow, ncol, COL_MAJOR,
CHAR(Rf_asChar(parameters)), ref, &handle)); CHAR(Rf_asChar(parameters)), ref, &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
UNPROTECT(1); UNPROTECT(1);
return ret; return ret;
R_API_END(); R_API_END();
...@@ -136,6 +143,7 @@ SEXP LGBM_DatasetGetSubset_R(SEXP handle, ...@@ -136,6 +143,7 @@ SEXP LGBM_DatasetGetSubset_R(SEXP handle,
idxvec.data(), len, CHAR(Rf_asChar(parameters)), idxvec.data(), len, CHAR(Rf_asChar(parameters)),
&res)); &res));
ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue)); ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
UNPROTECT(1); UNPROTECT(1);
return ret; return ret;
R_API_END(); R_API_END();
...@@ -211,7 +219,7 @@ SEXP LGBM_DatasetSaveBinary_R(SEXP handle, ...@@ -211,7 +219,7 @@ SEXP LGBM_DatasetSaveBinary_R(SEXP handle,
SEXP LGBM_DatasetFree_R(SEXP handle) { SEXP LGBM_DatasetFree_R(SEXP handle) {
R_API_BEGIN(); R_API_BEGIN();
if (R_ExternalPtrAddr(handle)) { if (!Rf_isNull(handle) && R_ExternalPtrAddr(handle)) {
CHECK_CALL(LGBM_DatasetFree(R_ExternalPtrAddr(handle))); CHECK_CALL(LGBM_DatasetFree(R_ExternalPtrAddr(handle)));
R_ClearExternalPtr(handle); R_ClearExternalPtr(handle);
} }
...@@ -320,9 +328,13 @@ SEXP LGBM_DatasetGetNumFeature_R(SEXP handle, ...@@ -320,9 +328,13 @@ SEXP LGBM_DatasetGetNumFeature_R(SEXP handle,
// --- start Booster interfaces // --- start Booster interfaces
void _BoosterFinalizer(SEXP handle) {
LGBM_BoosterFree_R(handle);
}
SEXP LGBM_BoosterFree_R(SEXP handle) { SEXP LGBM_BoosterFree_R(SEXP handle) {
R_API_BEGIN(); R_API_BEGIN();
if (R_ExternalPtrAddr(handle)) { if (!Rf_isNull(handle) && R_ExternalPtrAddr(handle)) {
CHECK_CALL(LGBM_BoosterFree(R_ExternalPtrAddr(handle))); CHECK_CALL(LGBM_BoosterFree(R_ExternalPtrAddr(handle)));
R_ClearExternalPtr(handle); R_ClearExternalPtr(handle);
} }
...@@ -336,6 +348,7 @@ SEXP LGBM_BoosterCreate_R(SEXP train_data, ...@@ -336,6 +348,7 @@ SEXP LGBM_BoosterCreate_R(SEXP train_data,
BoosterHandle handle = nullptr; BoosterHandle handle = nullptr;
CHECK_CALL(LGBM_BoosterCreate(R_ExternalPtrAddr(train_data), CHAR(Rf_asChar(parameters)), &handle)); CHECK_CALL(LGBM_BoosterCreate(R_ExternalPtrAddr(train_data), CHAR(Rf_asChar(parameters)), &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
UNPROTECT(1); UNPROTECT(1);
return ret; return ret;
R_API_END(); R_API_END();
...@@ -348,6 +361,7 @@ SEXP LGBM_BoosterCreateFromModelfile_R(SEXP filename) { ...@@ -348,6 +361,7 @@ SEXP LGBM_BoosterCreateFromModelfile_R(SEXP filename) {
BoosterHandle handle = nullptr; BoosterHandle handle = nullptr;
CHECK_CALL(LGBM_BoosterCreateFromModelfile(CHAR(Rf_asChar(filename)), &out_num_iterations, &handle)); CHECK_CALL(LGBM_BoosterCreateFromModelfile(CHAR(Rf_asChar(filename)), &out_num_iterations, &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
UNPROTECT(1); UNPROTECT(1);
return ret; return ret;
R_API_END(); R_API_END();
...@@ -360,6 +374,7 @@ SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) { ...@@ -360,6 +374,7 @@ SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) {
BoosterHandle handle = nullptr; BoosterHandle handle = nullptr;
CHECK_CALL(LGBM_BoosterLoadModelFromString(CHAR(Rf_asChar(model_str)), &out_num_iterations, &handle)); CHECK_CALL(LGBM_BoosterLoadModelFromString(CHAR(Rf_asChar(model_str)), &out_num_iterations, &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
UNPROTECT(1); UNPROTECT(1);
return ret; return ret;
R_API_END(); R_API_END();
......
context("Predictor") context("Predictor")
test_that("Predictor$finalize() should not fail", {
X <- as.matrix(as.integer(iris[, "Species"]), ncol = 1L)
y <- iris[["Sepal.Length"]]
dtrain <- lgb.Dataset(X, label = y)
bst <- lgb.train(
data = dtrain
, objective = "regression"
, verbose = -1L
, nrounds = 3L
)
model_file <- tempfile(fileext = ".model")
bst$save_model(filename = model_file)
predictor <- Predictor$new(modelfile = model_file)
expect_true(lgb.is.Predictor(predictor))
expect_false(lgb.is.null.handle(predictor$.__enclos_env__$private$handle))
predictor$finalize()
expect_true(lgb.is.null.handle(predictor$.__enclos_env__$private$handle))
# calling finalize() a second time shouldn't cause any issues
predictor$finalize()
expect_true(lgb.is.null.handle(predictor$.__enclos_env__$private$handle))
})
test_that("predictions do not fail for integer input", { test_that("predictions do not fail for integer input", {
X <- as.matrix(as.integer(iris[, "Species"]), ncol = 1L) X <- as.matrix(as.integer(iris[, "Species"]), ncol = 1L)
y <- iris[["Sepal.Length"]] y <- iris[["Sepal.Length"]]
......
...@@ -215,6 +215,24 @@ test_that("Dataset$update_params() works correctly for recognized Dataset parame ...@@ -215,6 +215,24 @@ test_that("Dataset$update_params() works correctly for recognized Dataset parame
} }
}) })
test_that("Dataset$finalize() should not fail on an already-finalized Dataset", {
dtest <- lgb.Dataset(
data = test_data
, label = test_label
)
expect_true(lgb.is.null.handle(dtest$.__enclos_env__$private$handle))
dtest$construct()
expect_false(lgb.is.null.handle(dtest$.__enclos_env__$private$handle))
dtest$finalize()
expect_true(lgb.is.null.handle(dtest$.__enclos_env__$private$handle))
# calling finalize() a second time shouldn't cause any issues
dtest$finalize()
expect_true(lgb.is.null.handle(dtest$.__enclos_env__$private$handle))
})
test_that("lgb.Dataset: should be able to run lgb.train() immediately after using lgb.Dataset() on a file", { test_that("lgb.Dataset: should be able to run lgb.train() immediately after using lgb.Dataset() on a file", {
dtest <- lgb.Dataset( dtest <- lgb.Dataset(
data = test_data data = test_data
......
context("Booster")
test_that("Booster$finalize() should not fail", {
X <- as.matrix(as.integer(iris[, "Species"]), ncol = 1L)
y <- iris[["Sepal.Length"]]
dtrain <- lgb.Dataset(X, label = y)
bst <- lgb.train(
data = dtrain
, objective = "regression"
, verbose = -1L
, nrounds = 3L
)
expect_true(lgb.is.Booster(bst))
expect_false(lgb.is.null.handle(bst$.__enclos_env__$private$handle))
bst$finalize()
expect_true(lgb.is.null.handle(bst$.__enclos_env__$private$handle))
# calling finalize() a second time shouldn't cause any issues
bst$finalize()
expect_true(lgb.is.null.handle(bst$.__enclos_env__$private$handle))
})
context("lgb.get.eval.result") context("lgb.get.eval.result")
test_that("lgb.get.eval.result() should throw an informative error if booster is not an lgb.Booster", { test_that("lgb.get.eval.result() should throw an informative error if booster is not an lgb.Booster", {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment