Unverified Commit 3c1f5a92 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Fix duplicated imports in hf_transformers_utils.py (#1141)

parent 57d0bd91
...@@ -222,7 +222,7 @@ async def async_request_openai_completions( ...@@ -222,7 +222,7 @@ async def async_request_openai_completions(
return output return output
async def async_request_ginfer( async def async_request_gserver(
request_func_input: RequestFuncInput, request_func_input: RequestFuncInput,
pbar: Optional[tqdm] = None, pbar: Optional[tqdm] = None,
) -> RequestFuncOutput: ) -> RequestFuncOutput:
...@@ -268,7 +268,7 @@ ASYNC_REQUEST_FUNCS = { ...@@ -268,7 +268,7 @@ ASYNC_REQUEST_FUNCS = {
"vllm": async_request_openai_completions, "vllm": async_request_openai_completions,
"lmdeploy": async_request_openai_completions, "lmdeploy": async_request_openai_completions,
"trt": async_request_trt_llm, "trt": async_request_trt_llm,
"ginfer": async_request_ginfer, "gserver": async_request_gserver,
} }
...@@ -790,7 +790,7 @@ def run_benchmark(args_: argparse.Namespace): ...@@ -790,7 +790,7 @@ def run_benchmark(args_: argparse.Namespace):
"lmdeploy": 23333, "lmdeploy": 23333,
"vllm": 8000, "vllm": 8000,
"trt": 8000, "trt": 8000,
"ginfer": 9988, "gserver": 9988,
}.get(args.backend, 30000) }.get(args.backend, 30000)
api_url = ( api_url = (
...@@ -813,7 +813,7 @@ def run_benchmark(args_: argparse.Namespace): ...@@ -813,7 +813,7 @@ def run_benchmark(args_: argparse.Namespace):
if args.model is None: if args.model is None:
print("Please provide a model using `--model` when using `trt` backend.") print("Please provide a model using `--model` when using `trt` backend.")
sys.exit(1) sys.exit(1)
elif args.backend == "ginfer": elif args.backend == "gserver":
api_url = args.base_url if args.base_url else f"{args.host}:{args.port}" api_url = args.base_url if args.base_url else f"{args.host}:{args.port}"
args.model = args.model or "default" args.model = args.model or "default"
......
...@@ -44,11 +44,6 @@ except ImportError: ...@@ -44,11 +44,6 @@ except ImportError:
from sglang.srt.utils import is_multimodal_model from sglang.srt.utils import is_multimodal_model
_CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = {
ChatGLMConfig.model_type: ChatGLMConfig,
DbrxConfig.model_type: DbrxConfig,
}
def download_from_hf(model_path: str): def download_from_hf(model_path: str):
if os.path.exists(model_path): if os.path.exists(model_path):
......
...@@ -112,7 +112,7 @@ def call_generate_srt_raw(prompt, temperature, max_tokens, stop=None, url=None): ...@@ -112,7 +112,7 @@ def call_generate_srt_raw(prompt, temperature, max_tokens, stop=None, url=None):
return pred return pred
def call_generate_ginfer(prompt, temperature, max_tokens, stop=None, url=None): def call_generate_gserver(prompt, temperature, max_tokens, stop=None, url=None):
raise NotImplementedError() raise NotImplementedError()
...@@ -256,7 +256,7 @@ def add_common_other_args_and_parse(parser: argparse.ArgumentParser): ...@@ -256,7 +256,7 @@ def add_common_other_args_and_parse(parser: argparse.ArgumentParser):
"vllm", "vllm",
"outlines", "outlines",
"lightllm", "lightllm",
"ginfer", "gserver",
"guidance", "guidance",
"lmql", "lmql",
"srt-raw", "srt-raw",
...@@ -277,7 +277,7 @@ def add_common_other_args_and_parse(parser: argparse.ArgumentParser): ...@@ -277,7 +277,7 @@ def add_common_other_args_and_parse(parser: argparse.ArgumentParser):
"lightllm": 22000, "lightllm": 22000,
"lmql": 23000, "lmql": 23000,
"srt-raw": 30000, "srt-raw": 30000,
"ginfer": 9988, "gserver": 9988,
} }
args.port = default_port.get(args.backend, None) args.port = default_port.get(args.backend, None)
return args return args
...@@ -313,8 +313,8 @@ def _get_call_generate(args: argparse.Namespace): ...@@ -313,8 +313,8 @@ def _get_call_generate(args: argparse.Namespace):
return partial(call_generate_vllm, url=f"{args.host}:{args.port}/generate") return partial(call_generate_vllm, url=f"{args.host}:{args.port}/generate")
elif args.backend == "srt-raw": elif args.backend == "srt-raw":
return partial(call_generate_srt_raw, url=f"{args.host}:{args.port}/generate") return partial(call_generate_srt_raw, url=f"{args.host}:{args.port}/generate")
elif args.backend == "ginfer": elif args.backend == "gserver":
return partial(call_generate_ginfer, url=f"{args.host}:{args.port}") return partial(call_generate_gserver, url=f"{args.host}:{args.port}")
elif args.backend == "outlines": elif args.backend == "outlines":
return partial(call_generate_outlines, url=f"{args.host}:{args.port}/generate") return partial(call_generate_outlines, url=f"{args.host}:{args.port}/generate")
elif args.backend == "guidance": elif args.backend == "guidance":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment