Unverified Commit 536280fc authored by Kris Hung's avatar Kris Hung Committed by GitHub
Browse files

test: Add deepep test for vllm (#2534)

parent 9a021885
...@@ -10,6 +10,7 @@ NODE_RANK="" ...@@ -10,6 +10,7 @@ NODE_RANK=""
GPUS_PER_NODE="" GPUS_PER_NODE=""
MASTER_ADDR="localhost" MASTER_ADDR="localhost"
LOG_DIR="./logs" LOG_DIR="./logs"
MODEL="deepseek-ai/DeepSeek-R1"
# Parse command line arguments # Parse command line arguments
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
...@@ -34,6 +35,10 @@ while [[ $# -gt 0 ]]; do ...@@ -34,6 +35,10 @@ while [[ $# -gt 0 ]]; do
LOG_DIR="$2" LOG_DIR="$2"
shift 2 shift 2
;; ;;
--model)
MODEL="$2"
shift 2
;;
-h|--help) -h|--help)
echo "Usage: $0 [OPTIONS]" echo "Usage: $0 [OPTIONS]"
echo "Options:" echo "Options:"
...@@ -42,6 +47,7 @@ while [[ $# -gt 0 ]]; do ...@@ -42,6 +47,7 @@ while [[ $# -gt 0 ]]; do
echo " --gpus-per-node L Number of GPUs per node (required, int)" echo " --gpus-per-node L Number of GPUs per node (required, int)"
echo " --master-addr ADDR Master node address (default: localhost)" echo " --master-addr ADDR Master node address (default: localhost)"
echo " --log-dir DIR Directory for log files (default: ./logs)" echo " --log-dir DIR Directory for log files (default: ./logs)"
echo " --model MODEL Model name to use (default: deepseek-ai/DeepSeek-R1)"
echo " -h, --help Show this help message" echo " -h, --help Show this help message"
exit 0 exit 0
;; ;;
...@@ -71,6 +77,7 @@ echo " GPUs per node: $GPUS_PER_NODE" ...@@ -71,6 +77,7 @@ echo " GPUs per node: $GPUS_PER_NODE"
echo " Data parallel size: $DATA_PARALLEL_SIZE" echo " Data parallel size: $DATA_PARALLEL_SIZE"
echo " Master address: $MASTER_ADDR" echo " Master address: $MASTER_ADDR"
echo " Log directory: $LOG_DIR" echo " Log directory: $LOG_DIR"
echo " Model name: $MODEL"
trap 'echo Cleaning up...; kill 0' EXIT trap 'echo Cleaning up...; kill 0' EXIT
...@@ -90,7 +97,7 @@ for ((i=0; i<GPUS_PER_NODE; i++)); do ...@@ -90,7 +97,7 @@ for ((i=0; i<GPUS_PER_NODE; i++)); do
VLLM_USE_DEEP_GEMM=1 \ VLLM_USE_DEEP_GEMM=1 \
VLLM_RANDOMIZE_DP_DUMMY_INPUTS=1 \ VLLM_RANDOMIZE_DP_DUMMY_INPUTS=1 \
python3 -m dynamo.vllm \ python3 -m dynamo.vllm \
--model deepseek-ai/DeepSeek-R1 \ --model $MODEL \
--data_parallel_size $DATA_PARALLEL_SIZE \ --data_parallel_size $DATA_PARALLEL_SIZE \
--data-parallel-rank $dp_rank \ --data-parallel-rank $dp_rank \
--enable-expert-parallel \ --enable-expert-parallel \
......
...@@ -175,6 +175,7 @@ markers = [ ...@@ -175,6 +175,7 @@ markers = [
"trtllm_marker: marks tests as requiring trtllm", "trtllm_marker: marks tests as requiring trtllm",
"sglang: marks tests as requiring sglang", "sglang: marks tests as requiring sglang",
"slow: marks tests as known to be slow", "slow: marks tests as known to be slow",
"h100: marks tests to run on H100",
"kvbm: marks tests for KV behavior and model determinism" "kvbm: marks tests for KV behavior and model determinism"
] ]
......
...@@ -260,6 +260,30 @@ vllm_configs = { ...@@ -260,6 +260,30 @@ vllm_configs = {
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
delayed_start=45, delayed_start=45,
), ),
"deepep": VLLMConfig(
name="deepep",
directory="/workspace/components/backends/vllm",
script_name="dsr1_dep.sh",
marks=[pytest.mark.gpu_2, pytest.mark.vllm, pytest.mark.h100],
endpoints=["v1/chat/completions", "v1/completions"],
response_handlers=[
chat_completions_response_handler,
completions_response_handler,
],
model="deepseek-ai/DeepSeek-V2-Lite",
delayed_start=45,
args=[
"--model",
"deepseek-ai/DeepSeek-V2-Lite",
"--num-nodes",
"1",
"--node-rank",
"0",
"--gpus-per-node",
"2",
],
timeout=300,
),
"multimodal_agg": VLLMConfig( "multimodal_agg": VLLMConfig(
name="multimodal_agg", name="multimodal_agg",
directory="/workspace/examples/multimodal", directory="/workspace/examples/multimodal",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment