Unverified Commit 1cab43c2 authored by youkaichao's avatar youkaichao Committed by GitHub
Browse files

[misc] instruct pytorch to use nvml-based cuda check (#15951)


Signed-off-by: default avataryoukaichao <youkaichao@gmail.com>
parent 8bd651b3
......@@ -4,9 +4,10 @@
# version library first. Such assumption is critical for some customization.
from .version import __version__, __version_tuple__ # isort:skip
import os
import torch
# The environment variables override should be imported before any other
# modules to ensure that the environment variables are set before any
# other modules are imported.
import vllm.env_override # isort:skip # noqa: F401
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
......@@ -23,19 +24,6 @@ from vllm.outputs import (ClassificationOutput, ClassificationRequestOutput,
from vllm.pooling_params import PoolingParams
from vllm.sampling_params import SamplingParams
# set some common config/environment variables that should be set
# for all processes created by vllm and all processes
# that interact with vllm workers.
# they are executed whenever `import vllm` is called.
# see https://github.com/NVIDIA/nccl/issues/1234
os.environ['NCCL_CUMEM_ENABLE'] = '0'
# see https://github.com/vllm-project/vllm/issues/10480
os.environ['TORCHINDUCTOR_COMPILE_THREADS'] = '1'
# see https://github.com/vllm-project/vllm/issues/10619
torch._inductor.config.compile_threads = 1
__all__ = [
"__version__",
"__version_tuple__",
......
# SPDX-License-Identifier: Apache-2.0
import os
import torch
# set some common config/environment variables that should be set
# for all processes created by vllm and all processes
# that interact with vllm workers.
# they are executed whenever `import vllm` is called.
# see https://github.com/NVIDIA/nccl/issues/1234
os.environ['NCCL_CUMEM_ENABLE'] = '0'
# see https://github.com/vllm-project/vllm/pull/15951
# it avoids unintentional cuda initialization from torch.cuda.is_available()
os.environ['PYTORCH_NVML_BASED_CUDA_CHECK'] = '1'
# see https://github.com/vllm-project/vllm/issues/10480
os.environ['TORCHINDUCTOR_COMPILE_THREADS'] = '1'
# see https://github.com/vllm-project/vllm/issues/10619
torch._inductor.config.compile_threads = 1
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment