Import turbomind in gradio server only when it is needed (#303)

59f8e674 · AllentDan · GitHub · 4279d8ca · 59f8e674
Unverified Commit 59f8e674 authored Aug 25, 2023 by AllentDan Committed by GitHub Aug 25, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

lmdeploy/serve/async_engine.py lmdeploy/serve/async_engine.py +1 -1

No files found.
--- a/lmdeploy/serve/async_engine.py
+++ b/lmdeploy/serve/async_engine.py
@@ -6,7 +6,6 @@ import random
 from contextlib import contextmanager
 from typing import Literal, Optional
-from lmdeploy import turbomind as tm
 from lmdeploy.model import MODELS, BaseModel
@@ -30,6 +29,7 @@ class AsyncEngine:
    """
    def __init__(self, model_path, instance_num=32, tp=1) -> None:
+        from lmdeploy import turbomind as tm
        from lmdeploy.turbomind.tokenizer import Tokenizer
        tokenizer_model_path = osp.join(model_path, 'triton_models',
                                        'tokenizer')