Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2ba60ec7
Unverified
Commit
2ba60ec7
authored
Oct 17, 2025
by
Nicolò Lucchesi
Committed by
GitHub
Oct 17, 2025
Browse files
[CI] Nixl integration tests (#27010)
Signed-off-by:
NickLucche
<
nlucches@redhat.com
>
parent
bd7157a0
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
72 additions
and
9 deletions
+72
-9
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+11
-0
tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
+13
-7
tests/v1/kv_connector/nixl_integration/test_accuracy.py
tests/v1/kv_connector/nixl_integration/test_accuracy.py
+6
-1
tests/v1/kv_connector/nixl_integration/toy_proxy_server.py
tests/v1/kv_connector/nixl_integration/toy_proxy_server.py
+2
-1
tests/v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh
...nnector/nixl_integration/tp_config_sweep_accuracy_test.sh
+40
-0
No files found.
.buildkite/test-pipeline.yaml
View file @
2ba60ec7
...
@@ -1085,6 +1085,17 @@ steps:
...
@@ -1085,6 +1085,17 @@ steps:
commands
:
commands
:
-
bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt
-
bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt
-
label
:
NixlConnector P/D accuracy tests (Distributed)
# 30min
timeout_in_minutes
:
30
working_dir
:
"
/vllm-workspace/tests"
num_gpus
:
4
source_file_dependencies
:
-
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-
tests/v1/kv_connector/nixl_integration/
commands
:
-
uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-
bash v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh
##### multi gpus test #####
##### multi gpus test #####
##### A100 test #####
##### A100 test #####
...
...
tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
View file @
2ba60ec7
...
@@ -34,15 +34,21 @@ else
...
@@ -34,15 +34,21 @@ else
fi
fi
# Models to run
# Models to run
MODELS
=(
MODEL_NAMES
=
${
MODEL_NAMES
:-}
if
[[
-n
"
$MODEL_NAMES
"
]]
;
then
MODELS
=(
"
$MODEL_NAMES
"
)
else
MODELS
=(
"Qwen/Qwen3-0.6B"
"Qwen/Qwen3-0.6B"
)
)
fi
# Number of prefill and decode instances to create
# Number of prefill and decode instances to create
NUM_PREFILL_INSTANCES
=
${
NUM_PREFILL_INSTANCES
:-
1
}
# Default to 1
NUM_PREFILL_INSTANCES
=
${
NUM_PREFILL_INSTANCES
:-
1
}
# Default to 1
NUM_DECODE_INSTANCES
=
${
NUM_DECODE_INSTANCES
:-
1
}
# Default to 1
NUM_DECODE_INSTANCES
=
${
NUM_DECODE_INSTANCES
:-
1
}
# Default to 1
PREFILLER_TP_SIZE
=
${
PREFILLER_TP_SIZE
:-
1
}
PREFILLER_TP_SIZE
=
${
PREFILLER_TP_SIZE
:-
1
}
DECODER_TP_SIZE
=
${
DECODER_TP_SIZE
:-
1
}
DECODER_TP_SIZE
=
${
DECODER_TP_SIZE
:-
1
}
GPU_MEMORY_UTILIZATION
=
${
GPU_MEMORY_UTILIZATION
:-
0
.2
}
# Find the git repository root directory
# Find the git repository root directory
GIT_ROOT
=
$(
git rev-parse
--show-toplevel
)
GIT_ROOT
=
$(
git rev-parse
--show-toplevel
)
...
@@ -130,7 +136,7 @@ run_tests_for_model() {
...
@@ -130,7 +136,7 @@ run_tests_for_model() {
vllm serve
$model_name
\
vllm serve
$model_name
\
--port
$PORT
\
--port
$PORT
\
--enforce-eager
\
--enforce-eager
\
--gpu-memory-utilization
0.2
\
--gpu-memory-utilization
$GPU_MEMORY_UTILIZATION
\
--tensor-parallel-size
$PREFILLER_TP_SIZE
\
--tensor-parallel-size
$PREFILLER_TP_SIZE
\
--kv-transfer-config '
$KV_CONFIG
'"
--kv-transfer-config '
$KV_CONFIG
'"
...
@@ -171,7 +177,7 @@ run_tests_for_model() {
...
@@ -171,7 +177,7 @@ run_tests_for_model() {
vllm serve
$model_name
\
vllm serve
$model_name
\
--port
$PORT
\
--port
$PORT
\
--enforce-eager
\
--enforce-eager
\
--gpu-memory-utilization
0.2
\
--gpu-memory-utilization
$GPU_MEMORY_UTILIZATION
\
--tensor-parallel-size
$DECODER_TP_SIZE
\
--tensor-parallel-size
$DECODER_TP_SIZE
\
--kv-transfer-config '
$KV_CONFIG
'"
--kv-transfer-config '
$KV_CONFIG
'"
...
@@ -200,7 +206,7 @@ run_tests_for_model() {
...
@@ -200,7 +206,7 @@ run_tests_for_model() {
done
done
# Build the command for the proxy server with all the hosts and ports
# Build the command for the proxy server with all the hosts and ports
PROXY_CMD
=
"python
${
GIT_ROOT
}
/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py --port 8192"
PROXY_CMD
=
"python
3
${
GIT_ROOT
}
/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py --port 8192"
# Add all prefill hosts and ports
# Add all prefill hosts and ports
PROXY_CMD+
=
" --prefiller-hosts
${
PREFILL_HOSTS
[@]
}
"
PROXY_CMD+
=
" --prefiller-hosts
${
PREFILL_HOSTS
[@]
}
"
...
@@ -219,7 +225,7 @@ run_tests_for_model() {
...
@@ -219,7 +225,7 @@ run_tests_for_model() {
# Run lm eval for this model
# Run lm eval for this model
echo
"Running tests for
$model_name
"
echo
"Running tests for
$model_name
"
TEST_MODEL
=
$model_name
python
-m
pytest
-s
-x
${
GIT_ROOT
}
/tests/v1/kv_connector/nixl_integration/test_accuracy.py
TEST_MODEL
=
$model_name
python
3
-m
pytest
-s
-x
${
GIT_ROOT
}
/tests/v1/kv_connector/nixl_integration/test_accuracy.py
# Clean up before running next model
# Clean up before running next model
cleanup_instances
cleanup_instances
...
...
tests/v1/kv_connector/nixl_integration/test_accuracy.py
View file @
2ba60ec7
...
@@ -12,7 +12,12 @@ FILTER = "exact_match,strict-match"
...
@@ -12,7 +12,12 @@ FILTER = "exact_match,strict-match"
RTOL
=
0.03
RTOL
=
0.03
# Model-specific expected values
# Model-specific expected values
EXPECTED_VALUES
=
{
"Qwen/Qwen3-0.6B"
:
0.41
,
"deepseek-ai/deepseek-vl2-small"
:
0.59
}
EXPECTED_VALUES
=
{
"Qwen/Qwen3-0.6B"
:
0.41
,
"deepseek-ai/deepseek-vl2-small"
:
0.59
,
"deepseek-ai/deepseek-vl2-tiny"
:
0.19
,
"deepseek-ai/DeepSeek-V2-Lite-Chat"
:
0.65
,
}
SIMPLE_PROMPT
=
(
SIMPLE_PROMPT
=
(
"The best part about working on vLLM is that I got to meet so many people across "
"The best part about working on vLLM is that I got to meet so many people across "
...
...
tests/v1/kv_connector/nixl_integration/toy_proxy_server.py
View file @
2ba60ec7
...
@@ -76,7 +76,8 @@ def parse_args():
...
@@ -76,7 +76,8 @@ def parse_args():
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--port"
,
type
=
int
,
default
=
8000
)
parser
.
add_argument
(
"--port"
,
type
=
int
,
default
=
8000
)
parser
.
add_argument
(
"--host"
,
type
=
str
,
default
=
"localhost"
)
# Always use 127.0.0.1 as localhost binds to IPv6 which is blocked on CI
parser
.
add_argument
(
"--host"
,
type
=
str
,
default
=
"127.0.0.1"
)
# For prefiller instances
# For prefiller instances
parser
.
add_argument
(
parser
.
add_argument
(
...
...
tests/v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh
0 → 100755
View file @
2ba60ec7
#!/usr/bin/env bash
set
-euo
pipefail
# Utility to run integration tests sequentially with varying TP configurations.
SCRIPT
=
"v1/kv_connector/nixl_integration/run_accuracy_test.sh"
# Define test configurations
configs
=(
"GPU_MEMORY_UTILIZATION=0.6 PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=2"
"GPU_MEMORY_UTILIZATION=0.6 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2"
"GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny"
# MLA case
"GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny"
)
run_tests
()
{
local
label
=
$1
local
extra_env
=
$2
echo
"=== Running tests (
${
label
}
) ==="
for
cfg
in
"
${
configs
[@]
}
"
;
do
echo
"-> Running with
${
cfg
}
${
extra_env
:+and
${
extra_env
}}
"
# Use 'env' to safely set variables without eval
if
!
env
${
extra_env
}
${
cfg
}
bash
"
${
SCRIPT
}
"
;
then
echo
"❌ Test failed for config:
${
cfg
}
${
extra_env
:+
(
${
extra_env
}
)
}
"
exit
1
fi
done
echo
"✅ All
${
label
}
tests passed!"
}
# Run tests
run_tests
"default backend"
""
# Check if FLASHINFER is set (non-empty)
if
[[
-n
"
${
FLASHINFER
:-}
"
]]
;
then
echo
"FLASHINFER is set, rerunning with VLLM_ATTENTION_BACKEND=FLASHINFER"
run_tests
"FLASHINFER backend"
"VLLM_ATTENTION_BACKEND=FLASHINFER"
else
echo
"FLASHINFER not set, skipping FLASHINFER runs."
fi
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment