[EPLB][Cleanup] Remove `is_async_enabled` from `EplbModelState` (#32050)

Signed-off-by: Sage Moore <sage@neuralmagic.com>

[EPLB][Cleanup] Remove `is_async_enabled` from `EplbModelState` (#32050)
Signed-off-by: Sage Moore <sage@neuralmagic.com>
6beef12b · Sage Moore · GitHub · ab74b2a2 · 6beef12b · 6beef12b
Unverified Commit 6beef12b authored Jan 13, 2026 by Sage Moore Committed by GitHub Jan 13, 2026
Show whitespace changes
Inline Side-by-side

Showing with 8 additions and 20 deletions

vllm/distributed/eplb/async_worker.py vllm/distributed/eplb/async_worker.py +4 -4

vllm/distributed/eplb/eplb_state.py vllm/distributed/eplb/eplb_state.py +4 -16

No files found.
--- a/vllm/distributed/eplb/async_worker.py
+++ b/vllm/distributed/eplb/async_worker.py
@@ -30,6 +30,7 @@ def start_async_worker(
    ep_group = get_ep_group().device_group
    rank = ep_group.rank()
    device_index = state.cuda_device_index
+    assert state.is_async
    def thread_target() -> None:
        assert device_index is not None
@@ -42,9 +43,9 @@ def start_async_worker(
                transfer_run_periodically(
                    state=state,
                    ep_group=ep_group,
+                    cuda_stream=cuda_stream,
                    is_profile=is_profile,
                    rank_mapping=rank_mapping,
-                    cuda_stream=cuda_stream,
                )
            )
        except Exception as exc:  # pragma: no cover - diagnostic path
@@ -60,17 +61,16 @@ def start_async_worker(
 async def transfer_run_periodically(
    state: "EplbState",
    ep_group: ProcessGroup,
+    cuda_stream: torch.cuda.Stream,
    is_profile: bool = False,
    rank_mapping: dict[int, int] | None = None,
-    cuda_stream: torch.cuda.Stream = None,
 ) -> None:
    while True:
        await asyncio.to_thread(state.rearrange_event.wait)
        logger.info("async worker woke up for EPLB transfer")
+        assert state.is_async
        for model_state in state.model_states.values():
-            if not model_state.is_async_enabled:
-                continue
            current_num_layers = model_state.model.num_moe_layers
            while (
                model_state.rebalanced

--- a/vllm/distributed/eplb/eplb_state.py
+++ b/vllm/distributed/eplb/eplb_state.py
@@ -182,10 +182,6 @@ class EplbModelState:
    """
    intermediate variable between `move_to_buffer` and `move_to_workspace`.
    """
-    is_async_enabled: bool
-    """
-    The flag indicates whether the EPLB is running in async mode.
-    """
    cuda_device_index: int | None
    """
    CUDA device index for the async EPLB worker thread.
@@ -518,7 +514,6 @@ class EplbState:
                recv_expert_ids=np.array([]),
                recv_dst_rows=np.array([]),
            ),
-            is_async_enabled=self.is_async,
            cuda_device_index=self.cuda_device_index,
            new_physical_to_logical_map=new_physical_to_logical_map,
            new_logical_to_physical_map=new_logical_to_physical_map,
@@ -630,19 +625,12 @@ class EplbState:
        if self.is_async:
            for eplb_model_state in self.model_states.values():
-                if not eplb_model_state.is_async_enabled:
-                    continue
                all_ranks_buffer_ready = False
                if eplb_model_state.pending_global_ready_check:
                    all_ranks_buffer_ready = self._all_ranks_buffer_ready(
                        eplb_model_state
                    )
-                if (
+                if eplb_model_state.ep_buffer_ready and all_ranks_buffer_ready:
-                    eplb_model_state.is_async_enabled
-                    and eplb_model_state.ep_buffer_ready
-                    and all_ranks_buffer_ready
-                ):
                    self.move_to_workspace(
                        model_state=eplb_model_state,
                        ep_group=ep_group,
@@ -664,8 +652,8 @@ class EplbState:
                        )
        if self.expert_rearrangement_step >= self.expert_rearrangement_step_interval:
-            if any(
+            if self.is_async and any(
-                eplb_model_state.is_async_enabled and eplb_model_state.rebalanced
+                eplb_model_state.rebalanced
                for eplb_model_state in self.model_states.values()
            ):
                # Still performing asynchronous rearrangement
@@ -822,7 +810,7 @@ class EplbState:
                eplb_model_state.physical_to_logical_map,
            )
-            if not eplb_model_state.is_async_enabled or is_profile:
+            if not self.is_async or is_profile:
                # Update expert weights
                rearrange_expert_weights_inplace(
                    eplb_model_state.physical_to_logical_map,