Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
4fc2b5e8
Unverified
Commit
4fc2b5e8
authored
Mar 19, 2026
by
KrishnanPrash
Committed by
GitHub
Mar 19, 2026
Browse files
fix: increase `/dev/shm` for CI pytest containers to fix NIXL UCX segfault (#7515)
parent
967ba9a2
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
18 additions
and
15 deletions
+18
-15
.github/actions/pytest/action.yml
.github/actions/pytest/action.yml
+8
-3
tests/serve/test_sglang.py
tests/serve/test_sglang.py
+3
-8
tests/serve/test_vllm.py
tests/serve/test_vllm.py
+7
-4
No files found.
.github/actions/pytest/action.yml
View file @
4fc2b5e8
...
...
@@ -154,6 +154,7 @@ runs:
docker run ${GPU_FLAGS} --rm -w /workspace \
--cpus=${NUM_CPUS} \
--shm-size=200m \
--network host \
"${DOCKER_ENV_FLAGS[@]}" \
--name ${{ env.CONTAINER_ID }}_pytest \
...
...
@@ -189,9 +190,13 @@ runs:
# Run pytest with detailed output and JUnit XML
set +e # Don't exit on test failures
# Define common docker flags for stability (Shared memory & limits)
# --ipc=host is critical for parallel pytest workers to communicate fast
DOCKER_OPTS="--ipc=host --ulimit memlock=-1 --ulimit stack=67108864"
# /dev/shm sizing: NIXL uses UCX which allocates shared memory segments in /dev/shm
# via shm_open(). Each NIXL agent with num_threads=8 creates 9 UCX workers, each needing
# ~4.8MB of shm. Disaggregated tests use up to 3 agents (encode+prefill+decode) = ~130MB.
# Docker's default is 64MB, which is insufficient and causes segfaults.
# Do NOT use --ipc=host here — it overrides --shm-size with the host's /dev/shm
# (64MB in K8s pods by default), silently ignoring the size we set.
DOCKER_OPTS="--shm-size=200m --ulimit memlock=-1 --ulimit stack=67108864"
# Determine docker runtime flags and pytest command based on dry_run mode
if [[ "${{ inputs.dry_run }}" == "true" ]]; then
...
...
tests/serve/test_sglang.py
View file @
4fc2b5e8
...
...
@@ -168,9 +168,9 @@ sglang_configs = {
],
),
# NOTE: Pack all workers on 1 GPU for lower CI resource requirements
"multimodal_epd_qwen"
:
SGLangConfig
(
"multimodal_e
_
pd_qwen"
:
SGLangConfig
(
# E/P/D architecture: Encode, Prefill, Decode workers all on GPU 0
name
=
"multimodal_epd_qwen"
,
name
=
"multimodal_e
_
pd_qwen"
,
directory
=
sglang_dir
,
script_name
=
"multimodal_epd.sh"
,
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
],
...
...
@@ -182,8 +182,6 @@ sglang_configs = {
"DYN_WORKER_GPU"
:
"0"
,
"DYN_ENCODE_GPU_MEM"
:
"0.1"
,
"DYN_WORKER_GPU_MEM"
:
"0.4"
,
# FIXME: NIXL Agent Initialization (shared memory interface) causes segfault
"UCX_TLS"
:
"^mm"
,
},
frontend_port
=
DefaultPort
.
FRONTEND
.
value
,
request_payloads
=
[
...
...
@@ -220,10 +218,7 @@ sglang_configs = {
model
=
"Qwen/Qwen3-VL-2B-Instruct"
,
script_args
=
[
"--model"
,
"Qwen/Qwen3-VL-2B-Instruct"
,
"--single-gpu"
],
timeout
=
360
,
env
=
{
# FIXME: NIXL Agent Initialization (shared memory interface) causes segfault
"UCX_TLS"
:
"^mm"
,
},
env
=
{},
frontend_port
=
DefaultPort
.
FRONTEND
.
value
,
request_payloads
=
[
chat_payload
(
...
...
tests/serve/test_vllm.py
View file @
4fc2b5e8
...
...
@@ -299,8 +299,8 @@ vllm_configs = {
],
),
# NOTE: Pack all workers on 1 GPU for lower CI resource requirements
"multimodal_
disagg_qwen3vl_2b_e_pd
"
:
VLLMConfig
(
name
=
"multimodal_
disagg_qwen3vl_2b_e_pd
"
,
"multimodal_
e_pd_qwen
"
:
VLLMConfig
(
name
=
"multimodal_
e_pd_qwen
"
,
directory
=
vllm_dir
,
script_name
=
"disagg_multimodal_e_pd.sh"
,
marks
=
[
...
...
@@ -311,6 +311,9 @@ vllm_configs = {
],
model
=
"Qwen/Qwen3-VL-2B-Instruct"
,
script_args
=
[
"--model"
,
"Qwen/Qwen3-VL-2B-Instruct"
,
"--single-gpu"
],
env
=
{
"DYN_VLLM_EMBEDDING_TRANSFER_MODE"
:
"nixl-write"
,
},
request_payloads
=
[
chat_payload
(
[
...
...
@@ -376,8 +379,8 @@ vllm_configs = {
# so _PROFILE_PYTEST_VRAM_FRAC_OVERRIDE has no effect. Regardless of GPU_MEM
# fractions (0.1/0.4/0.4), the 3 workers combined consistently use ~17.6 GiB
# total on this GPU.
"multimodal_disagg_qwen
3vl_2b_epd
"
:
VLLMConfig
(
name
=
"multimodal_disagg_qwen
3vl_2b_epd
"
,
"multimodal_disagg_qwen"
:
VLLMConfig
(
name
=
"multimodal_disagg_qwen"
,
directory
=
vllm_dir
,
script_name
=
"disagg_multimodal_epd.sh"
,
marks
=
[
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment