"tests/vscode:/vscode.git/clone" did not exist on "f5afaba162ac4f47e252618367ade80c0dd82b30"
Unverified Commit 1548b42b authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[R-package] [c++] add tighter multithreading control, avoid global OpenMP side...

[R-package] [c++] add tighter multithreading control, avoid global OpenMP side effects (fixes #4705, fixes #5102) (#6226)
parent e7979852
......@@ -23,6 +23,8 @@ Calls \code{readRDS} in what is expected to be a serialized \code{lgb.Booster} o
\examples{
\donttest{
library(lightgbm)
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
......
......@@ -46,6 +46,8 @@ Calls \code{saveRDS} on an \code{lgb.Booster} object, making it serializable bef
\examples{
\donttest{
library(lightgbm)
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/multithreading.R
\name{setLGBMThreads}
\alias{setLGBMThreads}
\alias{setLGBMthreads}
\title{Set maximum number of threads used by LightGBM}
\usage{
setLGBMthreads(num_threads)
}
\arguments{
\item{num_threads}{maximum number of threads to be used by LightGBM in multi-threaded operations}
}
\description{
LightGBM attempts to speed up many operations by using multi-threading.
The number of threads used in those operations can be controlled via the
\code{num_threads} parameter passed through \code{params} to functions like
\link{lgb.train} and \link{lgb.Dataset}. However, some operations (like materializing
a model from a text file) are done via code paths that don't explicitly accept thread-control
configuration.
Use this function to set the maximum number of threads LightGBM will use for such operations.
This function affects all LightGBM operations in the same process.
So, for example, if you call \code{setLGBMthreads(4)}, no other multi-threaded LightGBM
operation in the same process will use more than 4 threads.
Call \code{setLGBMthreads(-1)} to remove this limitation.
}
\seealso{
\link{getLGBMthreads}
}
......@@ -34,6 +34,8 @@ Set one attribute of a \code{lgb.Dataset}
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
......
......@@ -23,6 +23,8 @@ Get a new \code{lgb.Dataset} containing the specified rows of
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
......
......@@ -53,5 +53,6 @@ OBJECTS = \
treelearner/serial_tree_learner.o \
treelearner/tree_learner.o \
treelearner/voting_parallel_tree_learner.o \
utils/openmp_wrapper.o \
c_api.o \
lightgbm_R.o
......@@ -54,5 +54,6 @@ OBJECTS = \
treelearner/serial_tree_learner.o \
treelearner/tree_learner.o \
treelearner/voting_parallel_tree_learner.o \
utils/openmp_wrapper.o \
c_api.o \
lightgbm_R.o
......@@ -1212,6 +1212,23 @@ SEXP LGBM_BoosterGetLoadedParam_R(SEXP handle) {
R_API_END();
}
SEXP LGBM_GetMaxThreads_R(SEXP out) {
R_API_BEGIN();
int num_threads;
CHECK_CALL(LGBM_GetMaxThreads(&num_threads));
INTEGER(out)[0] = num_threads;
return R_NilValue;
R_API_END();
}
SEXP LGBM_SetMaxThreads_R(SEXP num_threads) {
R_API_BEGIN();
int new_num_threads = Rf_asInteger(num_threads);
CHECK_CALL(LGBM_SetMaxThreads(new_num_threads));
return R_NilValue;
R_API_END();
}
// .Call() calls
static const R_CallMethodDef CallEntries[] = {
{"LGBM_HandleIsNull_R" , (DL_FUNC) &LGBM_HandleIsNull_R , 1},
......@@ -1268,6 +1285,8 @@ static const R_CallMethodDef CallEntries[] = {
{"LGBM_BoosterDumpModel_R" , (DL_FUNC) &LGBM_BoosterDumpModel_R , 3},
{"LGBM_NullBoosterHandleError_R" , (DL_FUNC) &LGBM_NullBoosterHandleError_R , 0},
{"LGBM_DumpParamAliases_R" , (DL_FUNC) &LGBM_DumpParamAliases_R , 0},
{"LGBM_GetMaxThreads_R" , (DL_FUNC) &LGBM_GetMaxThreads_R , 1},
{"LGBM_SetMaxThreads_R" , (DL_FUNC) &LGBM_SetMaxThreads_R , 1},
{NULL, NULL, 0}
};
......
......@@ -850,4 +850,23 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterDumpModel_R(
*/
LIGHTGBM_C_EXPORT SEXP LGBM_DumpParamAliases_R();
/*!
* \brief Get current maximum number of threads used by LightGBM routines in this process.
* \param[out] out current maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads().
* \return R NULL value
*/
LIGHTGBM_C_EXPORT SEXP LGBM_GetMaxThreads_R(
SEXP out
);
/*!
* \brief Set maximum number of threads used by LightGBM routines in this process.
* \param num_threads maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads().
* \return R NULL value
*/
LIGHTGBM_C_EXPORT SEXP LGBM_SetMaxThreads_R(
SEXP num_threads
);
#endif // LIGHTGBM_R_H_
......@@ -11,6 +11,11 @@
# the check farm is a shared resource and will typically be running many checks simultaneously.
#
.LGB_MAX_THREADS <- 2L
setLGBMthreads(.LGB_MAX_THREADS)
# control data.table parallelism
# ref: https://github.com/Rdatatable/data.table/issues/5658
data.table::setDTthreads(1L)
# by default, how much should results in tests be allowed to differ from hard-coded expected numbers?
.LGB_NUMERIC_TOLERANCE <- 1e-6
......
test_that("getLGBMthreads() and setLGBMthreads() work as expected", {
# works with integer input
ret <- setLGBMthreads(2L)
expect_null(ret)
expect_equal(getLGBMthreads(), 2L)
# works with float input
ret <- setLGBMthreads(1.0)
expect_null(ret)
expect_equal(getLGBMthreads(), 1L)
# setting to any negative number sets max threads to -1
ret <- setLGBMthreads(-312L)
expect_null(ret)
expect_equal(getLGBMthreads(), -1L)
})
......@@ -27,6 +27,12 @@ Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/),
library(lightgbm)
```
```{r, include=FALSE}
# limit number of threads used, to be respectful of CRAN's resources when it checks this vignette
data.table::setDTthreads(1L)
setLGBMthreads(2L)
```
This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit.
## The dataset
......
......@@ -227,6 +227,7 @@ if ${BUILD_VIGNETTES} ; then
rm -f ./lightgbm/src/network/*.o
rm -f ./lightgbm/src/objective/*.o
rm -f ./lightgbm/src/treelearner/*.o
rm -f ./lightgbm/src/utils/*.o
echo "re-tarring ${TARBALL_NAME}"
tar \
......
......@@ -1561,6 +1561,20 @@ LIGHTGBM_C_EXPORT int LGBM_NetworkInitWithFunctions(int num_machines,
void* reduce_scatter_ext_fun,
void* allgather_ext_fun);
/*!
* \brief Set maximum number of threads used by LightGBM routines in this process.
* \param num_threads maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads().
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_SetMaxThreads(int num_threads);
/*!
* \brief Get current maximum number of threads used by LightGBM routines in this process.
* \param[out] out current maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads().
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_GetMaxThreads(int* out);
#if !defined(__cplusplus) && (!defined(__STDC__) || (__STDC_VERSION__ < 199901L))
/*! \brief Inline specifier no-op in C using standards before C99. */
#define INLINE_FUNCTION
......
......@@ -5,6 +5,15 @@
#ifndef LIGHTGBM_OPENMP_WRAPPER_H_
#define LIGHTGBM_OPENMP_WRAPPER_H_
#include <LightGBM/export.h>
// this can only be changed by LGBM_SetMaxThreads()
LIGHTGBM_EXTERN_C int LGBM_MAX_NUM_THREADS;
// this is modified by OMP_SET_NUM_THREADS(), for example
// by passing num_thread through params
LIGHTGBM_EXTERN_C int LGBM_DEFAULT_NUM_THREADS;
#ifdef _OPENMP
#include <LightGBM/utils/log.h>
......@@ -17,22 +26,25 @@
#include <stdexcept>
#include <vector>
inline int OMP_NUM_THREADS() {
int ret = 1;
#pragma omp parallel
#pragma omp master
{ ret = omp_get_num_threads(); }
return ret;
}
inline void OMP_SET_NUM_THREADS(int num_threads) {
static const int default_omp_num_threads = OMP_NUM_THREADS();
if (num_threads > 0) {
omp_set_num_threads(num_threads);
} else {
omp_set_num_threads(default_omp_num_threads);
}
}
/*
Get number of threads to use in OpenMP parallel regions.
By default, this will return the result of omp_get_max_threads(),
which is OpenMP-implementation dependent but generally can be controlled
by environment variable OMP_NUM_THREADS.
ref:
- https://www.openmp.org/spec-html/5.0/openmpsu112.html
- https://gcc.gnu.org/onlinedocs/libgomp/omp_005fget_005fmax_005fthreads.html
*/
LIGHTGBM_EXTERN_C int OMP_NUM_THREADS();
/*
Update the default number of threads that'll be used in OpenMP parallel
regions for LightGBM routines where the number of threads aren't directly
supplied.
*/
LIGHTGBM_EXTERN_C void OMP_SET_NUM_THREADS(int num_threads);
class ThreadExceptionHelper {
public:
......@@ -102,10 +114,7 @@ class ThreadExceptionHelper {
/** Fall here if no OPENMP support, so just
simulate a single thread running.
All #pragma omp should be ignored by the compiler **/
inline void omp_set_num_threads(int) __GOMP_NOTHROW {} // NOLINT (no cast done here)
inline void OMP_SET_NUM_THREADS(int) __GOMP_NOTHROW {}
inline int omp_get_num_threads() __GOMP_NOTHROW {return 1;}
inline int omp_get_max_threads() __GOMP_NOTHROW {return 1;}
inline int omp_get_thread_num() __GOMP_NOTHROW {return 0;}
inline int OMP_NUM_THREADS() __GOMP_NOTHROW { return 1; }
#ifdef __cplusplus
......
......@@ -2699,6 +2699,23 @@ int LGBM_NetworkInitWithFunctions(int num_machines, int rank,
API_END();
}
int LGBM_SetMaxThreads(int num_threads) {
API_BEGIN();
if (num_threads <= 0) {
LGBM_MAX_NUM_THREADS = -1;
} else {
LGBM_MAX_NUM_THREADS = num_threads;
}
API_END();
}
int LGBM_GetMaxThreads(int* out) {
API_BEGIN();
*out = LGBM_MAX_NUM_THREADS;
API_END();
}
// ---- start of some help functions
......
/*!
* Copyright (c) 2023 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#include <LightGBM/utils/openmp_wrapper.h>
int LGBM_MAX_NUM_THREADS = -1;
int LGBM_DEFAULT_NUM_THREADS = -1;
#ifdef _OPENMP
#include <omp.h>
int OMP_NUM_THREADS() {
int default_num_threads = 1;
if (LGBM_DEFAULT_NUM_THREADS > 0) {
// if LightGBM-specific default has been set, ignore OpenMP-global config
default_num_threads = LGBM_DEFAULT_NUM_THREADS;
} else {
// otherwise, default to OpenMP-global config
#pragma omp single
{ default_num_threads = omp_get_max_threads(); }
}
// ensure that if LGBM_SetMaxThreads() was ever called, LightGBM doesn't
// use more than that many threads
if (LGBM_MAX_NUM_THREADS > 0 && default_num_threads > LGBM_MAX_NUM_THREADS) {
return LGBM_MAX_NUM_THREADS;
}
return default_num_threads;
}
void OMP_SET_NUM_THREADS(int num_threads) {
if (num_threads <= 0) {
LGBM_DEFAULT_NUM_THREADS = -1;
} else {
LGBM_DEFAULT_NUM_THREADS = num_threads;
}
}
#endif // _OPENMP
......@@ -247,3 +247,36 @@ def test_booster():
c_str(''),
c_str('preb.txt'))
LIB.LGBM_BoosterFree(booster2)
def test_max_thread_control():
# at initialization, should be -1
num_threads = ctypes.c_int(0)
ret = LIB.LGBM_GetMaxThreads(
ctypes.byref(num_threads)
)
assert ret == 0
assert num_threads.value == -1
# updating that value through the C API should work
ret = LIB.LGBM_SetMaxThreads(
ctypes.c_int(6)
)
assert ret == 0
ret = LIB.LGBM_GetMaxThreads(
ctypes.byref(num_threads)
)
assert ret == 0
assert num_threads.value == 6
# resetting to any negative number should set it to -1
ret = LIB.LGBM_SetMaxThreads(
ctypes.c_int(-123)
)
assert ret == 0
ret = LIB.LGBM_GetMaxThreads(
ctypes.byref(num_threads)
)
assert ret == 0
assert num_threads.value == -1
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment