"docs/vscode:/vscode.git/clone" did not exist on "617d55c04e72be51d8b4cc4a90d8b136db27da3b"
Unverified Commit fae35432 authored by Alec's avatar Alec Committed by GitHub
Browse files

ci: longer timeout, change model for l40 (#2951)


Signed-off-by: default avataralec-flowers <aflowers@nvidia.com>
parent 6104c93f
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# Setup cleanup trap
cleanup() {
echo "Cleaning up background processes..."
kill $DYNAMO_PID 2>/dev/null || true
wait $DYNAMO_PID 2>/dev/null || true
echo "Cleanup complete."
}
trap cleanup EXIT INT TERM
# run clear_namespace
python3 -m dynamo.sglang.utils.clear_namespace --namespace dynamo
# run ingress
python3 -m dynamo.frontend --http-port=8000 &
DYNAMO_PID=$!
# run worker
python3 -m dynamo.sglang \
--model-path "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" \
--served-model-name "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" \
--page-size 16 \
--tp 1 \
--trust-remote-code \
--skip-tokenizer-init
...@@ -26,10 +26,10 @@ sglang_dir = os.environ.get("SGLANG_DIR", "/workspace/components/backends/sglang ...@@ -26,10 +26,10 @@ sglang_dir = os.environ.get("SGLANG_DIR", "/workspace/components/backends/sglang
sglang_configs = { sglang_configs = {
"aggregated": SGLangConfig( "aggregated": SGLangConfig(
name="aggregated", name="aggregated",
directory=sglang_dir, directory="/workspace/tests/serve",
script_name="agg.sh", script_name="sglang_agg.sh",
marks=[pytest.mark.gpu_1], marks=[pytest.mark.gpu_1],
model="Qwen/Qwen3-0.6B", model="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
env={}, env={},
models_port=8000, models_port=8000,
request_payloads=[chat_payload_default(), completion_payload_default()], request_payloads=[chat_payload_default(), completion_payload_default()],
......
...@@ -35,11 +35,12 @@ class EngineConfig: ...@@ -35,11 +35,12 @@ class EngineConfig:
name: str name: str
directory: str directory: str
script_name: str
marks: List[Any] marks: List[Any]
request_payloads: List[BasePayload] request_payloads: List[BasePayload]
model: str model: str
script_name: Optional[str] = None
command: Optional[List[str]] = None
script_args: Optional[List[str]] = None script_args: Optional[List[str]] = None
models_port: int = 8000 models_port: int = 8000
timeout: int = 600 timeout: int = 600
...@@ -47,6 +48,13 @@ class EngineConfig: ...@@ -47,6 +48,13 @@ class EngineConfig:
env: Dict[str, str] = field(default_factory=dict) env: Dict[str, str] = field(default_factory=dict)
stragglers: list[str] = field(default_factory=list) stragglers: list[str] = field(default_factory=list)
def __post_init__(self):
"""Validate that either script_name or command is provided, but not both."""
if not self.script_name and not self.command:
raise ValueError("Either script_name or command must be provided")
if self.script_name and self.command:
raise ValueError("Cannot provide both script_name and command")
class EngineProcess(ManagedProcess): class EngineProcess(ManagedProcess):
"""Base class for LLM engine processes (vLLM, TRT-LLM, etc.)""" """Base class for LLM engine processes (vLLM, TRT-LLM, etc.)"""
...@@ -132,24 +140,21 @@ class EngineProcess(ManagedProcess): ...@@ -132,24 +140,21 @@ class EngineProcess(ManagedProcess):
logger.info(f"SUCCESS: All expected log patterns: {patterns} found") logger.info(f"SUCCESS: All expected log patterns: {patterns} found")
@classmethod @classmethod
def from_script( def from_config(
cls, cls,
config: EngineConfig, config: EngineConfig,
request: Any, request: Any,
extra_env: Optional[Dict[str, str]] = None, extra_env: Optional[Dict[str, str]] = None,
) -> "EngineProcess": ) -> "EngineProcess":
"""Factory to create an EngineProcess configured to run a launch script.""" """Factory to create an EngineProcess from configuration (script or command)."""
assert isinstance(config, EngineConfig), "Must use an instance of EngineConfig" assert isinstance(config, EngineConfig), "Must use an instance of EngineConfig"
directory = config.directory if config.script_name:
script_path = os.path.join(directory, "launch", config.script_name) command = cls._build_script_command(config)
elif config.command:
if not os.path.exists(script_path): command = config.command.copy()
raise FileNotFoundError(f"Script not found: {script_path}") else:
raise ValueError("Either script_name or command must be provided in config")
command: List[str] = ["bash", script_path]
if config.script_args:
command.extend(config.script_args)
env = os.environ.copy() env = os.environ.copy()
if getattr(config, "env", None): if getattr(config, "env", None):
...@@ -162,7 +167,7 @@ class EngineProcess(ManagedProcess): ...@@ -162,7 +167,7 @@ class EngineProcess(ManagedProcess):
env=env, env=env,
timeout=config.timeout, timeout=config.timeout,
display_output=True, display_output=True,
working_dir=directory, working_dir=config.directory,
health_check_ports=[], health_check_ports=[],
health_check_urls=[ health_check_urls=[
(f"http://localhost:{config.models_port}/v1/models", check_models_api), (f"http://localhost:{config.models_port}/v1/models", check_models_api),
...@@ -176,3 +181,47 @@ class EngineProcess(ManagedProcess): ...@@ -176,3 +181,47 @@ class EngineProcess(ManagedProcess):
stragglers=config.stragglers, stragglers=config.stragglers,
log_dir=request.node.name, log_dir=request.node.name,
) )
@classmethod
def _build_script_command(cls, config: EngineConfig) -> List[str]:
"""Build command from script configuration."""
assert (
config.script_name
), "Must provide script_name to run fn _build_script_command"
directory = config.directory
script_path = os.path.join(directory, "launch", config.script_name)
if not os.path.exists(script_path):
raise FileNotFoundError(f"Script not found: {script_path}")
command: List[str] = ["bash", script_path]
if config.script_args:
command.extend(config.script_args)
return command
@classmethod
def from_script(
cls,
config: EngineConfig,
request: Any,
extra_env: Optional[Dict[str, str]] = None,
) -> "EngineProcess":
"""Factory to create an EngineProcess configured to run a launch script.
Deprecated: Use from_config() instead.
"""
return cls.from_config(config, request, extra_env)
@classmethod
def from_command(
cls,
config: EngineConfig,
request: Any,
extra_env: Optional[Dict[str, str]] = None,
) -> "EngineProcess":
"""Factory to create an EngineProcess configured to run a direct command.
Deprecated: Use from_config() instead.
"""
return cls.from_config(config, request, extra_env)
...@@ -30,7 +30,7 @@ class BasePayload: ...@@ -30,7 +30,7 @@ class BasePayload:
expected_response: List[str] expected_response: List[str]
expected_log: List[str] expected_log: List[str]
repeat_count: int = 1 repeat_count: int = 1
timeout: int = 30 timeout: int = 60
# Connection info # Connection info
host: str = "localhost" host: str = "localhost"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment