Unverified Commit 59f8e674 authored by AllentDan's avatar AllentDan Committed by GitHub
Browse files

Import turbomind in gradio server only when it is needed (#303)

parent 4279d8ca
...@@ -6,7 +6,6 @@ import random ...@@ -6,7 +6,6 @@ import random
from contextlib import contextmanager from contextlib import contextmanager
from typing import Literal, Optional from typing import Literal, Optional
from lmdeploy import turbomind as tm
from lmdeploy.model import MODELS, BaseModel from lmdeploy.model import MODELS, BaseModel
...@@ -30,6 +29,7 @@ class AsyncEngine: ...@@ -30,6 +29,7 @@ class AsyncEngine:
""" """
def __init__(self, model_path, instance_num=32, tp=1) -> None: def __init__(self, model_path, instance_num=32, tp=1) -> None:
from lmdeploy import turbomind as tm
from lmdeploy.turbomind.tokenizer import Tokenizer from lmdeploy.turbomind.tokenizer import Tokenizer
tokenizer_model_path = osp.join(model_path, 'triton_models', tokenizer_model_path = osp.join(model_path, 'triton_models',
'tokenizer') 'tokenizer')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment