Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1e1c0678
Unverified
Commit
1e1c0678
authored
Nov 19, 2025
by
Bradley D
Committed by
GitHub
Nov 20, 2025
Browse files
[ci][amd] fix EPLB execution test (#28742)
Signed-off-by:
Bradley Davis
<
bradleyhd@meta.com
>
parent
7218f839
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
213 additions
and
210 deletions
+213
-210
tests/distributed/test_eplb_execute.py
tests/distributed/test_eplb_execute.py
+213
-210
No files found.
tests/distributed/test_eplb_execute.py
View file @
1e1c0678
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
multiprocessing
import
os
import
os
import
random
import
random
import
pytest
import
pytest
import
torch
import
torch
import
torch.distributed
import
torch.distributed
import
torch.multiprocessing
as
mp
from
vllm.distributed.eplb.rebalance_execute
import
rearrange_expert_weights_inplace
from
vllm.distributed.eplb.rebalance_execute
import
rearrange_expert_weights_inplace
from
vllm.distributed.parallel_state
import
(
from
vllm.distributed.parallel_state
import
(
...
@@ -17,10 +17,12 @@ from vllm.distributed.parallel_state import (
...
@@ -17,10 +17,12 @@ from vllm.distributed.parallel_state import (
)
)
from
vllm.utils.system_utils
import
update_environment_variables
from
vllm.utils.system_utils
import
update_environment_variables
mp
.
set_start_method
(
"spawn"
,
force
=
True
)
def
distributed_run
(
fn
,
world_size
):
def
distributed_run
(
fn
,
world_size
,
*
args
):
number_of_processes
=
world_size
number_of_processes
=
world_size
processes
:
list
[
m
ultiprocessing
.
Process
]
=
[]
processes
:
list
[
m
p
.
Process
]
=
[]
for
i
in
range
(
number_of_processes
):
for
i
in
range
(
number_of_processes
):
env
:
dict
[
str
,
str
]
=
{}
env
:
dict
[
str
,
str
]
=
{}
env
[
"RANK"
]
=
str
(
i
)
env
[
"RANK"
]
=
str
(
i
)
...
@@ -29,7 +31,7 @@ def distributed_run(fn, world_size):
...
@@ -29,7 +31,7 @@ def distributed_run(fn, world_size):
env
[
"LOCAL_WORLD_SIZE"
]
=
str
(
number_of_processes
)
env
[
"LOCAL_WORLD_SIZE"
]
=
str
(
number_of_processes
)
env
[
"MASTER_ADDR"
]
=
"localhost"
env
[
"MASTER_ADDR"
]
=
"localhost"
env
[
"MASTER_PORT"
]
=
"12345"
env
[
"MASTER_PORT"
]
=
"12345"
p
=
m
ultiprocessing
.
Process
(
target
=
fn
,
args
=
(
env
,))
p
=
m
p
.
Process
(
target
=
fn
,
args
=
(
env
,
world_size
,
*
args
))
processes
.
append
(
p
)
processes
.
append
(
p
)
p
.
start
()
p
.
start
()
...
@@ -40,24 +42,16 @@ def distributed_run(fn, world_size):
...
@@ -40,24 +42,16 @@ def distributed_run(fn, world_size):
assert
p
.
exitcode
==
0
assert
p
.
exitcode
==
0
def
worker_fn_wrapper
(
fn
):
def
set_env_vars_and_device
(
env
:
dict
[
str
,
str
])
->
None
:
# `multiprocessing.Process` cannot accept environment variables directly
update_environment_variables
(
env
)
# so we need to pass the environment variables as arguments
local_rank
=
os
.
environ
[
"LOCAL_RANK"
]
# and update the environment variables in the function
device
=
torch
.
device
(
f
"cuda:
{
local_rank
}
"
)
def
wrapped_fn
(
env
):
torch
.
cuda
.
set_device
(
device
)
update_environment_variables
(
env
)
init_distributed_environment
()
local_rank
=
os
.
environ
[
"LOCAL_RANK"
]
device
=
torch
.
device
(
f
"cuda:
{
local_rank
}
"
)
torch
.
cuda
.
set_device
(
device
)
init_distributed_environment
()
# Ensure each worker process has the same random seed
random
.
seed
(
42
)
torch
.
manual_seed
(
42
)
fn
()
return
wrapped_fn
# Ensure each worker process has the same random seed
random
.
seed
(
42
)
torch
.
manual_seed
(
42
)
def
create_expert_indices_with_redundancy
(
def
create_expert_indices_with_redundancy
(
...
@@ -275,6 +269,79 @@ def verify_redundant_experts_have_same_weights(
...
@@ -275,6 +269,79 @@ def verify_redundant_experts_have_same_weights(
)
)
def
_test_rearrange_expert_weights_with_redundancy
(
env
,
world_size
,
num_layers
,
num_local_experts
,
num_logical_experts
)
->
None
:
# Initialize model parallel (using tensor parallel as an entrypoint
# to expert parallel)
set_env_vars_and_device
(
env
)
ensure_model_parallel_initialized
(
tensor_model_parallel_size
=
world_size
,
pipeline_model_parallel_size
=
1
)
ep_group
=
get_tp_group
().
cpu_group
ep_rank
=
torch
.
distributed
.
get_rank
()
device
=
torch
.
device
(
f
"cuda:
{
ep_rank
}
"
)
# Test parameters
total_physical_experts
=
world_size
*
num_local_experts
hidden_sizes
=
[
32
,
64
]
# Two different weight matrices
# Create old expert indices (with redundancy)
redundancy_config
=
create_redundancy_config
(
num_logical_experts
,
total_physical_experts
)
old_indices
=
create_expert_indices_with_redundancy
(
num_layers
,
num_logical_experts
,
total_physical_experts
,
redundancy_config
,
)
# Create new expert indices (with redundancy)
new_redundancy_config
=
create_redundancy_config
(
num_logical_experts
,
total_physical_experts
)
new_indices
=
create_expert_indices_with_redundancy
(
num_layers
,
num_logical_experts
,
total_physical_experts
,
new_redundancy_config
,
)
# Create expert weights
expert_weights
=
create_expert_weights
(
num_layers
,
num_local_experts
,
hidden_sizes
,
ep_rank
,
device
,
old_indices
)
# Execute weight rearrangement
rearrange_expert_weights_inplace
(
old_indices
,
new_indices
,
expert_weights
,
ep_group
,
is_profile
=
False
,
)
# Verify the rearrangement result
verify_expert_weights_after_shuffle
(
expert_weights
,
new_indices
,
hidden_sizes
,
ep_rank
,
num_local_experts
,
)
verify_redundant_experts_have_same_weights
(
expert_weights
,
new_indices
,
hidden_sizes
,
world_size
,
num_local_experts
,
)
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"world_size,num_layers,num_local_experts,num_logical_experts"
,
"world_size,num_layers,num_local_experts,num_logical_experts"
,
[
[
...
@@ -305,78 +372,69 @@ def test_rearrange_expert_weights_with_redundancy(
...
@@ -305,78 +372,69 @@ def test_rearrange_expert_weights_with_redundancy(
if
torch
.
cuda
.
device_count
()
<
world_size
:
if
torch
.
cuda
.
device_count
()
<
world_size
:
pytest
.
skip
(
f
"Need at least
{
world_size
}
GPUs to run the test"
)
pytest
.
skip
(
f
"Need at least
{
world_size
}
GPUs to run the test"
)
distributed_run
(
@
worker_fn_wrapper
_test_rearrange_expert_weights_with_redundancy
,
def
worker_fn
():
world_size
,
# Initialize model parallel (using tensor parallel as an entrypoint
num_layers
,
# to expert parallel)
num_local_experts
,
ensure_model_parallel_initialized
(
num_logical_experts
,
tensor_model_parallel_size
=
world_size
,
pipeline_model_parallel_size
=
1
)
)
ep_group
=
get_tp_group
().
cpu_group
def
_test_rearrange_expert_weights_no_change
(
env
,
world_size
)
->
None
:
ep_rank
=
torch
.
distributed
.
get_rank
()
set_env_vars_and_device
(
env
)
device
=
torch
.
device
(
f
"cuda:
{
ep_rank
}
"
)
ensure_model_parallel_initialized
(
tensor_model_parallel_size
=
world_size
,
pipeline_model_parallel_size
=
1
# Test parameters
)
total_physical_experts
=
world_size
*
num_local_experts
hidden_sizes
=
[
32
,
64
]
# Two different weight matrices
ep_group
=
get_tp_group
().
cpu_group
ep_rank
=
torch
.
distributed
.
get_rank
()
# Create old expert indices (with redundancy)
device
=
torch
.
device
(
f
"cuda:
{
ep_rank
}
"
)
redundancy_config
=
create_redundancy_config
(
num_logical_experts
,
total_physical_experts
num_layers
=
2
)
num_local_experts
=
2
total_physical_experts
=
world_size
*
num_local_experts
old_indices
=
create_expert_indices_with_redundancy
(
num_logical_experts
=
total_physical_experts
//
2
# Some redundancy
num_layers
,
hidden_sizes
=
[
32
,
64
]
num_logical_experts
,
total_physical_experts
,
# Create redundancy configuration
redundancy_config
,
redundancy_config
=
[
2
]
*
num_logical_experts
)
# Same indices - no change
# Create new expert indices (with redundancy)
indices
=
create_expert_indices_with_redundancy
(
new_redundancy_config
=
create_redundancy_config
(
num_layers
,
num_logical_experts
,
total_physical_experts
,
redundancy_config
num_logical_experts
,
total_physical_experts
)
)
new_indices
=
create_expert_indices_with_redundancy
(
expert_weights
=
create_expert_weights
(
num_layers
,
num_layers
,
num_local_experts
,
hidden_sizes
,
ep_rank
,
device
,
indices
num_logical_experts
,
)
total_physical_experts
,
new_redundancy_config
,
# Save original weights
)
original_weights
=
[]
for
layer_weights
in
expert_weights
:
# Create expert weights
layer_copy
=
[]
expert_weights
=
create_expert_weights
(
for
weight
in
layer_weights
:
num_layers
,
num_local_experts
,
hidden_sizes
,
ep_rank
,
device
,
old_indices
layer_copy
.
append
(
weight
.
clone
())
)
original_weights
.
append
(
layer_copy
)
# Execute weight rearrangement
# Execute rearrangement (should be no change)
rearrange_expert_weights_inplace
(
rearrange_expert_weights_inplace
(
old_indices
,
indices
,
new_indices
,
indices
,
# Same indices
expert_weights
,
expert_weights
,
ep_group
,
ep_group
,
is_profile
=
False
,
is_profile
=
False
,
)
)
# Verify the rearrangement result
# Verify that the weights have not changed
verify_expert_weights_after_shuffle
(
for
layer
in
range
(
num_layers
):
expert_weights
,
for
weight_idx
in
range
(
len
(
hidden_sizes
)):
new_indices
,
torch
.
testing
.
assert_close
(
hidden_sizes
,
expert_weights
[
layer
][
weight_idx
],
ep_rank
,
original_weights
[
layer
][
weight_idx
],
num_local_experts
,
msg
=
f
"""Layer
{
layer
}
, weight
{
weight_idx
}
)
should remain unchanged"""
,
)
verify_redundant_experts_have_same_weights
(
expert_weights
,
new_indices
,
hidden_sizes
,
world_size
,
num_local_experts
,
)
distributed_run
(
worker_fn
,
world_size
)
@
pytest
.
mark
.
parametrize
(
"world_size"
,
[
2
,
4
])
@
pytest
.
mark
.
parametrize
(
"world_size"
,
[
2
,
4
])
...
@@ -388,62 +446,69 @@ def test_rearrange_expert_weights_no_change(world_size):
...
@@ -388,62 +446,69 @@ def test_rearrange_expert_weights_no_change(world_size):
if
torch
.
cuda
.
device_count
()
<
world_size
:
if
torch
.
cuda
.
device_count
()
<
world_size
:
pytest
.
skip
(
f
"Need at least
{
world_size
}
GPUs to run the test"
)
pytest
.
skip
(
f
"Need at least
{
world_size
}
GPUs to run the test"
)
distributed_run
(
_test_rearrange_expert_weights_no_change
,
world_size
)
@
worker_fn_wrapper
def
worker_fn
():
ensure_model_parallel_initialized
(
tensor_model_parallel_size
=
world_size
,
pipeline_model_parallel_size
=
1
)
ep_group
=
get_tp_group
().
cpu_group
ep_rank
=
torch
.
distributed
.
get_rank
()
device
=
torch
.
device
(
f
"cuda:
{
ep_rank
}
"
)
num_layers
=
2
num_local_experts
=
2
total_physical_experts
=
world_size
*
num_local_experts
num_logical_experts
=
total_physical_experts
//
2
# Some redundancy
hidden_sizes
=
[
32
,
64
]
# Create redundancy configuration
redundancy_config
=
[
2
]
*
num_logical_experts
# Same indices - no change
indices
=
create_expert_indices_with_redundancy
(
num_layers
,
num_logical_experts
,
total_physical_experts
,
redundancy_config
)
expert_weights
=
create_expert_weights
(
num_layers
,
num_local_experts
,
hidden_sizes
,
ep_rank
,
device
,
indices
)
# Save original weights
original_weights
=
[]
for
layer_weights
in
expert_weights
:
layer_copy
=
[]
for
weight
in
layer_weights
:
layer_copy
.
append
(
weight
.
clone
())
original_weights
.
append
(
layer_copy
)
# Execute rearrangement (should be no change)
rearrange_expert_weights_inplace
(
indices
,
indices
,
# Same indices
expert_weights
,
ep_group
,
is_profile
=
False
,
)
# Verify that the weights have not changed
for
layer
in
range
(
num_layers
):
for
weight_idx
in
range
(
len
(
hidden_sizes
)):
torch
.
testing
.
assert_close
(
expert_weights
[
layer
][
weight_idx
],
original_weights
[
layer
][
weight_idx
],
msg
=
f
"Layer
{
layer
}
, weight
{
weight_idx
}
should remain unchanged"
,
)
distributed_run
(
worker_fn
,
world_size
)
def
_test_rearrange_expert_weights_profile_mode
(
env
,
world_size
)
->
None
:
set_env_vars_and_device
(
env
)
ensure_model_parallel_initialized
(
tensor_model_parallel_size
=
world_size
,
pipeline_model_parallel_size
=
1
)
ep_group
=
get_tp_group
().
cpu_group
ep_rank
=
torch
.
distributed
.
get_rank
()
device
=
torch
.
device
(
f
"cuda:
{
ep_rank
}
"
)
num_layers
=
1
num_local_experts
=
2
total_physical_experts
=
world_size
*
num_local_experts
num_logical_experts
=
total_physical_experts
//
2
hidden_sizes
=
[
32
]
# Create different index distributions
old_redundancy
=
create_redundancy_config
(
num_logical_experts
,
total_physical_experts
)
new_redundancy
=
create_redundancy_config
(
num_logical_experts
,
total_physical_experts
)
old_indices
=
create_expert_indices_with_redundancy
(
num_layers
,
num_logical_experts
,
total_physical_experts
,
old_redundancy
)
new_indices
=
create_expert_indices_with_redundancy
(
num_layers
,
num_logical_experts
,
total_physical_experts
,
new_redundancy
)
expert_weights
=
create_expert_weights
(
num_layers
,
num_local_experts
,
hidden_sizes
,
ep_rank
,
device
,
old_indices
)
# Save original weights
original_weights
=
[]
for
layer_weights
in
expert_weights
:
layer_copy
=
[]
for
weight
in
layer_weights
:
layer_copy
.
append
(
weight
.
clone
())
original_weights
.
append
(
layer_copy
)
# Execute profile mode rearrangement
rearrange_expert_weights_inplace
(
old_indices
,
new_indices
,
expert_weights
,
ep_group
,
is_profile
=
True
,
# Profile mode
)
# In profile mode, the weights should remain unchanged
for
layer
in
range
(
num_layers
):
for
weight_idx
in
range
(
len
(
hidden_sizes
)):
torch
.
testing
.
assert_close
(
expert_weights
[
layer
][
weight_idx
],
original_weights
[
layer
][
weight_idx
],
msg
=
"In profile mode, the weights should remain unchanged"
,
)
@
pytest
.
mark
.
parametrize
(
"world_size"
,
[
2
,
4
])
@
pytest
.
mark
.
parametrize
(
"world_size"
,
[
2
,
4
])
...
@@ -452,66 +517,4 @@ def test_rearrange_expert_weights_profile_mode(world_size):
...
@@ -452,66 +517,4 @@ def test_rearrange_expert_weights_profile_mode(world_size):
if
torch
.
cuda
.
device_count
()
<
world_size
:
if
torch
.
cuda
.
device_count
()
<
world_size
:
pytest
.
skip
(
f
"Need at least
{
world_size
}
GPUs to run the test"
)
pytest
.
skip
(
f
"Need at least
{
world_size
}
GPUs to run the test"
)
distributed_run
(
_test_rearrange_expert_weights_profile_mode
,
world_size
)
@
worker_fn_wrapper
def
worker_fn
():
ensure_model_parallel_initialized
(
tensor_model_parallel_size
=
world_size
,
pipeline_model_parallel_size
=
1
)
ep_group
=
get_tp_group
().
cpu_group
ep_rank
=
torch
.
distributed
.
get_rank
()
device
=
torch
.
device
(
f
"cuda:
{
ep_rank
}
"
)
num_layers
=
1
num_local_experts
=
2
total_physical_experts
=
world_size
*
num_local_experts
num_logical_experts
=
total_physical_experts
//
2
hidden_sizes
=
[
32
]
# Create different index distributions
old_redundancy
=
create_redundancy_config
(
num_logical_experts
,
total_physical_experts
)
new_redundancy
=
create_redundancy_config
(
num_logical_experts
,
total_physical_experts
)
old_indices
=
create_expert_indices_with_redundancy
(
num_layers
,
num_logical_experts
,
total_physical_experts
,
old_redundancy
)
new_indices
=
create_expert_indices_with_redundancy
(
num_layers
,
num_logical_experts
,
total_physical_experts
,
new_redundancy
)
expert_weights
=
create_expert_weights
(
num_layers
,
num_local_experts
,
hidden_sizes
,
ep_rank
,
device
,
old_indices
)
# Save original weights
original_weights
=
[]
for
layer_weights
in
expert_weights
:
layer_copy
=
[]
for
weight
in
layer_weights
:
layer_copy
.
append
(
weight
.
clone
())
original_weights
.
append
(
layer_copy
)
# Execute profile mode rearrangement
rearrange_expert_weights_inplace
(
old_indices
,
new_indices
,
expert_weights
,
ep_group
,
is_profile
=
True
,
# Profile mode
)
# In profile mode, the weights should remain unchanged
for
layer
in
range
(
num_layers
):
for
weight_idx
in
range
(
len
(
hidden_sizes
)):
torch
.
testing
.
assert_close
(
expert_weights
[
layer
][
weight_idx
],
original_weights
[
layer
][
weight_idx
],
msg
=
"In profile mode, the weights should remain unchanged"
,
)
distributed_run
(
worker_fn
,
world_size
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment