"vscode:/vscode.git/clone" did not exist on "46d4359450cd194ab2a4f2fdc370ff4b33a188e2"
Unverified Commit 5bf35a91 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Doc][CI/Build] Update docs and tests to use `vllm serve` (#6431)

parent a19e8d37
...@@ -23,17 +23,17 @@ TEST_IMAGE_URLS = [ ...@@ -23,17 +23,17 @@ TEST_IMAGE_URLS = [
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def server(): def server():
with RemoteOpenAIServer([ args = [
"--model", "--dtype",
MODEL_NAME, "bfloat16",
"--dtype", "--max-model-len",
"bfloat16", "4096",
"--max-model-len", "--enforce-eager",
"4096", "--chat-template",
"--enforce-eager", str(LLAVA_CHAT_TEMPLATE),
"--chat-template", ]
str(LLAVA_CHAT_TEMPLATE),
]) as remote_server: with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
yield remote_server yield remote_server
......
...@@ -214,12 +214,12 @@ def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path): ...@@ -214,12 +214,12 @@ def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path):
## Start OpenAI API server ## Start OpenAI API server
openai_args = [ openai_args = [
"--model", model_ref, "--dtype", "float16", "--load-format", "--dtype", "float16", "--load-format",
"tensorizer", "--model-loader-extra-config", "tensorizer", "--model-loader-extra-config",
json.dumps(model_loader_extra_config), json.dumps(model_loader_extra_config),
] ]
with RemoteOpenAIServer(openai_args) as server: with RemoteOpenAIServer(model_ref, openai_args) as server:
print("Server ready.") print("Server ready.")
client = server.get_client() client = server.get_client()
......
...@@ -49,7 +49,13 @@ class RemoteOpenAIServer: ...@@ -49,7 +49,13 @@ class RemoteOpenAIServer:
DUMMY_API_KEY = "token-abc123" # vLLM's OpenAI server does not need API key DUMMY_API_KEY = "token-abc123" # vLLM's OpenAI server does not need API key
MAX_SERVER_START_WAIT_S = 600 # wait for server to start for 60 seconds MAX_SERVER_START_WAIT_S = 600 # wait for server to start for 60 seconds
def __init__(self, cli_args: List[str], *, auto_port: bool = True) -> None: def __init__(
self,
model: str,
cli_args: List[str],
*,
auto_port: bool = True,
) -> None:
if auto_port: if auto_port:
if "-p" in cli_args or "--port" in cli_args: if "-p" in cli_args or "--port" in cli_args:
raise ValueError("You have manually specified the port" raise ValueError("You have manually specified the port"
...@@ -68,12 +74,10 @@ class RemoteOpenAIServer: ...@@ -68,12 +74,10 @@ class RemoteOpenAIServer:
# the current process might initialize cuda, # the current process might initialize cuda,
# to be safe, we should use spawn method # to be safe, we should use spawn method
env['VLLM_WORKER_MULTIPROC_METHOD'] = 'spawn' env['VLLM_WORKER_MULTIPROC_METHOD'] = 'spawn'
self.proc = subprocess.Popen( self.proc = subprocess.Popen(["vllm", "serve"] + [model] + cli_args,
[sys.executable, "-m", "vllm.entrypoints.openai.api_server"] + env=env,
cli_args, stdout=sys.stdout,
env=env, stderr=sys.stderr)
stdout=sys.stdout,
stderr=sys.stderr)
self._wait_for_server(url=self.url_for("health"), self._wait_for_server(url=self.url_for("health"),
timeout=self.MAX_SERVER_START_WAIT_S) timeout=self.MAX_SERVER_START_WAIT_S)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment