Unverified Commit ad6eca40 authored by leiwen83's avatar leiwen83 Committed by GitHub
Browse files

Fix early CUDA init via get_architecture_class_name import (#3770)


Signed-off-by: default avatarLei Wen <wenlei03@qiyi.com>
Co-authored-by: default avatarLei Wen <wenlei03@qiyi.com>
parent 205b9494
......@@ -13,7 +13,6 @@ from vllm.engine.ray_utils import initialize_ray_cluster
from vllm.executor.executor_base import ExecutorBase
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.model_executor.model_loader import get_architecture_class_name
from vllm.outputs import RequestOutput
from vllm.sampling_params import SamplingParams
from vllm.sequence import (MultiModalData, SamplerOutput, Sequence,
......@@ -115,6 +114,8 @@ class LLMEngine:
# If usage stat is enabled, collect relevant info.
if is_usage_stats_enabled():
from vllm.model_executor.model_loader import (
get_architecture_class_name)
usage_message.report_usage(
get_architecture_class_name(model_config),
usage_context,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment