Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
4fc2b5e8
"vscode:/vscode.git/clone" did not exist on "0abebe388404be84bb38f2c2ed32198635941bcc"
Unverified
Commit
4fc2b5e8
authored
Mar 19, 2026
by
KrishnanPrash
Committed by
GitHub
Mar 19, 2026
Browse files
fix: increase `/dev/shm` for CI pytest containers to fix NIXL UCX segfault (#7515)
parent
967ba9a2
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
18 additions
and
15 deletions
+18
-15
.github/actions/pytest/action.yml
.github/actions/pytest/action.yml
+8
-3
tests/serve/test_sglang.py
tests/serve/test_sglang.py
+3
-8
tests/serve/test_vllm.py
tests/serve/test_vllm.py
+7
-4
No files found.
.github/actions/pytest/action.yml
View file @
4fc2b5e8
...
...
@@ -154,6 +154,7 @@ runs:
docker run ${GPU_FLAGS} --rm -w /workspace \
--cpus=${NUM_CPUS} \
--shm-size=200m \
--network host \
"${DOCKER_ENV_FLAGS[@]}" \
--name ${{ env.CONTAINER_ID }}_pytest \
...
...
@@ -189,9 +190,13 @@ runs:
# Run pytest with detailed output and JUnit XML
set +e # Don't exit on test failures
# Define common docker flags for stability (Shared memory & limits)
# --ipc=host is critical for parallel pytest workers to communicate fast
DOCKER_OPTS="--ipc=host --ulimit memlock=-1 --ulimit stack=67108864"
# /dev/shm sizing: NIXL uses UCX which allocates shared memory segments in /dev/shm
# via shm_open(). Each NIXL agent with num_threads=8 creates 9 UCX workers, each needing
# ~4.8MB of shm. Disaggregated tests use up to 3 agents (encode+prefill+decode) = ~130MB.
# Docker's default is 64MB, which is insufficient and causes segfaults.
# Do NOT use --ipc=host here — it overrides --shm-size with the host's /dev/shm
# (64MB in K8s pods by default), silently ignoring the size we set.
DOCKER_OPTS="--shm-size=200m --ulimit memlock=-1 --ulimit stack=67108864"
# Determine docker runtime flags and pytest command based on dry_run mode
if [[ "${{ inputs.dry_run }}" == "true" ]]; then
...
...
tests/serve/test_sglang.py
View file @
4fc2b5e8
...
...
@@ -168,9 +168,9 @@ sglang_configs = {
],
),
# NOTE: Pack all workers on 1 GPU for lower CI resource requirements
"multimodal_epd_qwen"
:
SGLangConfig
(
"multimodal_e
_
pd_qwen"
:
SGLangConfig
(
# E/P/D architecture: Encode, Prefill, Decode workers all on GPU 0
name
=
"multimodal_epd_qwen"
,
name
=
"multimodal_e
_
pd_qwen"
,
directory
=
sglang_dir
,
script_name
=
"multimodal_epd.sh"
,
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
],
...
...
@@ -182,8 +182,6 @@ sglang_configs = {
"DYN_WORKER_GPU"
:
"0"
,
"DYN_ENCODE_GPU_MEM"
:
"0.1"
,
"DYN_WORKER_GPU_MEM"
:
"0.4"
,
# FIXME: NIXL Agent Initialization (shared memory interface) causes segfault
"UCX_TLS"
:
"^mm"
,
},
frontend_port
=
DefaultPort
.
FRONTEND
.
value
,
request_payloads
=
[
...
...
@@ -220,10 +218,7 @@ sglang_configs = {
model
=
"Qwen/Qwen3-VL-2B-Instruct"
,
script_args
=
[
"--model"
,
"Qwen/Qwen3-VL-2B-Instruct"
,
"--single-gpu"
],
timeout
=
360
,
env
=
{
# FIXME: NIXL Agent Initialization (shared memory interface) causes segfault
"UCX_TLS"
:
"^mm"
,
},
env
=
{},
frontend_port
=
DefaultPort
.
FRONTEND
.
value
,
request_payloads
=
[
chat_payload
(
...
...
tests/serve/test_vllm.py
View file @
4fc2b5e8
...
...
@@ -299,8 +299,8 @@ vllm_configs = {
],
),
# NOTE: Pack all workers on 1 GPU for lower CI resource requirements
"multimodal_
disagg_qwen3vl_2b_e_pd
"
:
VLLMConfig
(
name
=
"multimodal_
disagg_qwen3vl_2b_e_pd
"
,
"multimodal_
e_pd_qwen
"
:
VLLMConfig
(
name
=
"multimodal_
e_pd_qwen
"
,
directory
=
vllm_dir
,
script_name
=
"disagg_multimodal_e_pd.sh"
,
marks
=
[
...
...
@@ -311,6 +311,9 @@ vllm_configs = {
],
model
=
"Qwen/Qwen3-VL-2B-Instruct"
,
script_args
=
[
"--model"
,
"Qwen/Qwen3-VL-2B-Instruct"
,
"--single-gpu"
],
env
=
{
"DYN_VLLM_EMBEDDING_TRANSFER_MODE"
:
"nixl-write"
,
},
request_payloads
=
[
chat_payload
(
[
...
...
@@ -376,8 +379,8 @@ vllm_configs = {
# so _PROFILE_PYTEST_VRAM_FRAC_OVERRIDE has no effect. Regardless of GPU_MEM
# fractions (0.1/0.4/0.4), the 3 workers combined consistently use ~17.6 GiB
# total on this GPU.
"multimodal_disagg_qwen
3vl_2b_epd
"
:
VLLMConfig
(
name
=
"multimodal_disagg_qwen
3vl_2b_epd
"
,
"multimodal_disagg_qwen"
:
VLLMConfig
(
name
=
"multimodal_disagg_qwen"
,
directory
=
vllm_dir
,
script_name
=
"disagg_multimodal_epd.sh"
,
marks
=
[
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment