Unverified Commit 536280fc authored by Kris Hung's avatar Kris Hung Committed by GitHub
Browse files

test: Add deepep test for vllm (#2534)

parent 9a021885
......@@ -10,6 +10,7 @@ NODE_RANK=""
GPUS_PER_NODE=""
MASTER_ADDR="localhost"
LOG_DIR="./logs"
MODEL="deepseek-ai/DeepSeek-R1"
# Parse command line arguments
while [[ $# -gt 0 ]]; do
......@@ -34,6 +35,10 @@ while [[ $# -gt 0 ]]; do
LOG_DIR="$2"
shift 2
;;
--model)
MODEL="$2"
shift 2
;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo "Options:"
......@@ -42,6 +47,7 @@ while [[ $# -gt 0 ]]; do
echo " --gpus-per-node L Number of GPUs per node (required, int)"
echo " --master-addr ADDR Master node address (default: localhost)"
echo " --log-dir DIR Directory for log files (default: ./logs)"
echo " --model MODEL Model name to use (default: deepseek-ai/DeepSeek-R1)"
echo " -h, --help Show this help message"
exit 0
;;
......@@ -71,6 +77,7 @@ echo " GPUs per node: $GPUS_PER_NODE"
echo " Data parallel size: $DATA_PARALLEL_SIZE"
echo " Master address: $MASTER_ADDR"
echo " Log directory: $LOG_DIR"
echo " Model name: $MODEL"
trap 'echo Cleaning up...; kill 0' EXIT
......@@ -90,7 +97,7 @@ for ((i=0; i<GPUS_PER_NODE; i++)); do
VLLM_USE_DEEP_GEMM=1 \
VLLM_RANDOMIZE_DP_DUMMY_INPUTS=1 \
python3 -m dynamo.vllm \
--model deepseek-ai/DeepSeek-R1 \
--model $MODEL \
--data_parallel_size $DATA_PARALLEL_SIZE \
--data-parallel-rank $dp_rank \
--enable-expert-parallel \
......
......@@ -175,6 +175,7 @@ markers = [
"trtllm_marker: marks tests as requiring trtllm",
"sglang: marks tests as requiring sglang",
"slow: marks tests as known to be slow",
"h100: marks tests to run on H100",
"kvbm: marks tests for KV behavior and model determinism"
]
......
......@@ -260,6 +260,30 @@ vllm_configs = {
model="Qwen/Qwen3-0.6B",
delayed_start=45,
),
"deepep": VLLMConfig(
name="deepep",
directory="/workspace/components/backends/vllm",
script_name="dsr1_dep.sh",
marks=[pytest.mark.gpu_2, pytest.mark.vllm, pytest.mark.h100],
endpoints=["v1/chat/completions", "v1/completions"],
response_handlers=[
chat_completions_response_handler,
completions_response_handler,
],
model="deepseek-ai/DeepSeek-V2-Lite",
delayed_start=45,
args=[
"--model",
"deepseek-ai/DeepSeek-V2-Lite",
"--num-nodes",
"1",
"--node-rank",
"0",
"--gpus-per-node",
"2",
],
timeout=300,
),
"multimodal_agg": VLLMConfig(
name="multimodal_agg",
directory="/workspace/examples/multimodal",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment