[BUG] Fix async rlhf tests (#35811)

Signed-off-by: ahao-anyscale <ahao@anyscale.com>

[BUG] Fix async rlhf tests (#35811)
Signed-off-by: ahao-anyscale <ahao@anyscale.com>
d6b61e51 · Aaron Hao · GitHub · cf632499 · d6b61e51 · d6b61e51
Unverified Commit d6b61e51 authored Mar 11, 2026 by Aaron Hao Committed by GitHub Mar 11, 2026
Show whitespace changes
Inline Side-by-side

Showing with 5 additions and 1 deletion

.buildkite/test_areas/distributed.yaml .buildkite/test_areas/distributed.yaml +1 -1

vllm/v1/worker/gpu_worker.py vllm/v1/worker/gpu_worker.py +4 -0

No files found.
--- a/.buildkite/test_areas/distributed.yaml
+++ b/.buildkite/test_areas/distributed.yaml
@@ -149,7 +149,7 @@ steps:
  num_devices: 2
  commands:
    - pytest -v -s tests/distributed/test_context_parallel.py
-    # - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 examples/offline_inference/new_weight_syncing/rlhf_async_new_apis.py --- failing, need to re-enable
+    - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 examples/offline_inference/new_weight_syncing/rlhf_async_new_apis.py
    - VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model=Qwen/Qwen1.5-MoE-A2.7B -tp=1 -dp=2 --max-model-len=2048 --all2all-backend=deepep_high_throughput
    - pytest -v -s tests/v1/distributed/test_dbo.py

--- a/vllm/v1/worker/gpu_worker.py
+++ b/vllm/v1/worker/gpu_worker.py
@@ -1006,6 +1006,10 @@ class Worker(WorkerBase):
                load_weights=load_weights_direct,
            )
+        # NCCL broadcast/packed path are asynchronous.
+        # Sync here so the next step uses the new weights.
+        torch.accelerator.synchronize()
    def shutdown(self) -> None:
        # has_kv_transfer_group can be None during interpreter shutdown.
        if ensure_kv_transfer_shutdown is not None: