Unverified Commit c47aafa3 authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[BugFix] Lazily import XgrammarBackend to avoid early cuda init (#15171)


Signed-off-by: default avatarNick Hill <nhill@redhat.com>
parent cfbca8a2
...@@ -9,7 +9,6 @@ from vllm.config import VllmConfig ...@@ -9,7 +9,6 @@ from vllm.config import VllmConfig
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.v1.structured_output.backend_types import (StructuredOutputBackend, from vllm.v1.structured_output.backend_types import (StructuredOutputBackend,
StructuredOutputGrammar) StructuredOutputGrammar)
from vllm.v1.structured_output.backend_xgrammar import XgrammarBackend
if TYPE_CHECKING: if TYPE_CHECKING:
import numpy as np import numpy as np
...@@ -47,6 +46,9 @@ class StructuredOutputManager: ...@@ -47,6 +46,9 @@ class StructuredOutputManager:
if self.backend is None: if self.backend is None:
backend_name = request.sampling_params.guided_decoding.backend_name backend_name = request.sampling_params.guided_decoding.backend_name
if backend_name == "xgrammar": if backend_name == "xgrammar":
from vllm.v1.structured_output.backend_xgrammar import (
XgrammarBackend)
self.backend = XgrammarBackend(self.vllm_config) self.backend = XgrammarBackend(self.vllm_config)
else: else:
raise ValueError( raise ValueError(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment