Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1e1c0678
Unverified
Commit
1e1c0678
authored
Nov 19, 2025
by
Bradley D
Committed by
GitHub
Nov 20, 2025
Browse files
[ci][amd] fix EPLB execution test (#28742)
Signed-off-by:
Bradley Davis
<
bradleyhd@meta.com
>
parent
7218f839
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
213 additions
and
210 deletions
+213
-210
tests/distributed/test_eplb_execute.py
tests/distributed/test_eplb_execute.py
+213
-210
No files found.
tests/distributed/test_eplb_execute.py
View file @
1e1c0678
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
multiprocessing
import
os
import
random
import
pytest
import
torch
import
torch.distributed
import
torch.multiprocessing
as
mp
from
vllm.distributed.eplb.rebalance_execute
import
rearrange_expert_weights_inplace
from
vllm.distributed.parallel_state
import
(
...
...
@@ -17,10 +17,12 @@ from vllm.distributed.parallel_state import (
)
from
vllm.utils.system_utils
import
update_environment_variables
mp
.
set_start_method
(
"spawn"
,
force
=
True
)
def
distributed_run
(
fn
,
world_size
):
def
distributed_run
(
fn
,
world_size
,
*
args
):
number_of_processes
=
world_size
processes
:
list
[
m
ultiprocessing
.
Process
]
=
[]
processes
:
list
[
m
p
.
Process
]
=
[]
for
i
in
range
(
number_of_processes
):
env
:
dict
[
str
,
str
]
=
{}
env
[
"RANK"
]
=
str
(
i
)
...
...
@@ -29,7 +31,7 @@ def distributed_run(fn, world_size):
env
[
"LOCAL_WORLD_SIZE"
]
=
str
(
number_of_processes
)
env
[
"MASTER_ADDR"
]
=
"localhost"
env
[
"MASTER_PORT"
]
=
"12345"
p
=
m
ultiprocessing
.
Process
(
target
=
fn
,
args
=
(
env
,))
p
=
m
p
.
Process
(
target
=
fn
,
args
=
(
env
,
world_size
,
*
args
))
processes
.
append
(
p
)
p
.
start
()
...
...
@@ -40,11 +42,7 @@ def distributed_run(fn, world_size):
assert
p
.
exitcode
==
0
def
worker_fn_wrapper
(
fn
):
# `multiprocessing.Process` cannot accept environment variables directly
# so we need to pass the environment variables as arguments
# and update the environment variables in the function
def
wrapped_fn
(
env
):
def
set_env_vars_and_device
(
env
:
dict
[
str
,
str
])
->
None
:
update_environment_variables
(
env
)
local_rank
=
os
.
environ
[
"LOCAL_RANK"
]
device
=
torch
.
device
(
f
"cuda:
{
local_rank
}
"
)
...
...
@@ -55,10 +53,6 @@ def worker_fn_wrapper(fn):
random
.
seed
(
42
)
torch
.
manual_seed
(
42
)
fn
()
return
wrapped_fn
def
create_expert_indices_with_redundancy
(
num_layers
:
int
,
...
...
@@ -275,41 +269,12 @@ def verify_redundant_experts_have_same_weights(
)
@
pytest
.
mark
.
parametrize
(
"world_size,num_layers,num_local_experts,num_logical_experts"
,
[
# 2 GPU, 2 experts per GPU
# 3 logical experts, 4 physical experts, 1 redundant experts
(
2
,
1
,
2
,
3
),
# 2 GPU, 3 experts per GPU
# 4 logical experts, 6 physical experts, 2 redundant experts
(
2
,
2
,
3
,
4
),
# 2 GPU, 8 experts per GPU
# 16 logical experts, 16 physical experts, 0 redundant experts
(
2
,
4
,
8
,
16
),
# 4 GPU, 2 experts per GPU
# 6 logical experts, 8 physical experts, 2 redundant experts
(
4
,
1
,
2
,
6
),
# 4 GPU, 2 experts per GPU
# 5 logical experts, 8 physical experts, 3 redundant experts
(
4
,
2
,
2
,
5
),
# 4 GPU, 8 experts per GPU
# 16 logical experts, 32 physical experts, 16 redundant experts
(
4
,
8
,
8
,
16
),
],
)
def
test_rearrange_expert_weights_with_redundancy
(
world_size
,
num_layers
,
num_local_experts
,
num_logical_experts
):
"""Test the functionality of rearranging expert weights with redundancy."""
if
torch
.
cuda
.
device_count
()
<
world_size
:
pytest
.
skip
(
f
"Need at least
{
world_size
}
GPUs to run the test"
)
@
worker_fn_wrapper
def
worker_fn
():
def
_test_rearrange_expert_weights_with_redundancy
(
env
,
world_size
,
num_layers
,
num_local_experts
,
num_logical_experts
)
->
None
:
# Initialize model parallel (using tensor parallel as an entrypoint
# to expert parallel)
set_env_vars_and_device
(
env
)
ensure_model_parallel_initialized
(
tensor_model_parallel_size
=
world_size
,
pipeline_model_parallel_size
=
1
)
...
...
@@ -376,21 +341,48 @@ def test_rearrange_expert_weights_with_redundancy(
num_local_experts
,
)
distributed_run
(
worker_fn
,
world_size
)
@
pytest
.
mark
.
parametrize
(
"world_size"
,
[
2
,
4
])
def
test_rearrange_expert_weights_no_change
(
world_size
):
"""
Test that when the indices do not change, the weights should remain
unchanged.
"""
@
pytest
.
mark
.
parametrize
(
"world_size,num_layers,num_local_experts,num_logical_experts"
,
[
# 2 GPU, 2 experts per GPU
# 3 logical experts, 4 physical experts, 1 redundant experts
(
2
,
1
,
2
,
3
),
# 2 GPU, 3 experts per GPU
# 4 logical experts, 6 physical experts, 2 redundant experts
(
2
,
2
,
3
,
4
),
# 2 GPU, 8 experts per GPU
# 16 logical experts, 16 physical experts, 0 redundant experts
(
2
,
4
,
8
,
16
),
# 4 GPU, 2 experts per GPU
# 6 logical experts, 8 physical experts, 2 redundant experts
(
4
,
1
,
2
,
6
),
# 4 GPU, 2 experts per GPU
# 5 logical experts, 8 physical experts, 3 redundant experts
(
4
,
2
,
2
,
5
),
# 4 GPU, 8 experts per GPU
# 16 logical experts, 32 physical experts, 16 redundant experts
(
4
,
8
,
8
,
16
),
],
)
def
test_rearrange_expert_weights_with_redundancy
(
world_size
,
num_layers
,
num_local_experts
,
num_logical_experts
):
"""Test the functionality of rearranging expert weights with redundancy."""
if
torch
.
cuda
.
device_count
()
<
world_size
:
pytest
.
skip
(
f
"Need at least
{
world_size
}
GPUs to run the test"
)
distributed_run
(
_test_rearrange_expert_weights_with_redundancy
,
world_size
,
num_layers
,
num_local_experts
,
num_logical_experts
,
)
@
worker_fn_wrapper
def
worker_fn
():
def
_test_rearrange_expert_weights_no_change
(
env
,
world_size
)
->
None
:
set_env_vars_and_device
(
env
)
ensure_model_parallel_initialized
(
tensor_model_parallel_size
=
world_size
,
pipeline_model_parallel_size
=
1
)
...
...
@@ -440,21 +432,25 @@ def test_rearrange_expert_weights_no_change(world_size):
torch
.
testing
.
assert_close
(
expert_weights
[
layer
][
weight_idx
],
original_weights
[
layer
][
weight_idx
],
msg
=
f
"Layer
{
layer
}
, weight
{
weight_idx
}
should remain unchanged"
,
msg
=
f
"""Layer
{
layer
}
, weight
{
weight_idx
}
should remain unchanged"""
,
)
distributed_run
(
worker_fn
,
world_size
)
@
pytest
.
mark
.
parametrize
(
"world_size"
,
[
2
,
4
])
def
test_rearrange_expert_weights_profile_mode
(
world_size
):
"""Test profile mode (should not copy actual weights)"""
def
test_rearrange_expert_weights_no_change
(
world_size
):
"""
Test that when the indices do not change, the weights should remain
unchanged.
"""
if
torch
.
cuda
.
device_count
()
<
world_size
:
pytest
.
skip
(
f
"Need at least
{
world_size
}
GPUs to run the test"
)
distributed_run
(
_test_rearrange_expert_weights_no_change
,
world_size
)
@
worker_fn_wrapper
def
worker_fn
():
def
_test_rearrange_expert_weights_profile_mode
(
env
,
world_size
)
->
None
:
set_env_vars_and_device
(
env
)
ensure_model_parallel_initialized
(
tensor_model_parallel_size
=
world_size
,
pipeline_model_parallel_size
=
1
)
...
...
@@ -514,4 +510,11 @@ def test_rearrange_expert_weights_profile_mode(world_size):
msg
=
"In profile mode, the weights should remain unchanged"
,
)
distributed_run
(
worker_fn
,
world_size
)
@
pytest
.
mark
.
parametrize
(
"world_size"
,
[
2
,
4
])
def
test_rearrange_expert_weights_profile_mode
(
world_size
):
"""Test profile mode (should not copy actual weights)"""
if
torch
.
cuda
.
device_count
()
<
world_size
:
pytest
.
skip
(
f
"Need at least
{
world_size
}
GPUs to run the test"
)
distributed_run
(
_test_rearrange_expert_weights_profile_mode
,
world_size
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment