[RLHF] Fix torch.dtype not serializable in example (#22158)

Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com>

[RLHF] Fix torch.dtype not serializable in example (#22158)
Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com>
845420ac · 22quinn · GitHub · e27d25a0 · 845420ac · 845420ac
Unverified Commit 845420ac authored Aug 03, 2025 by 22quinn Committed by GitHub Aug 04, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 2 deletions

examples/offline_inference/rlhf.py examples/offline_inference/rlhf.py +4 -1

examples/offline_inference/rlhf_utils.py examples/offline_inference/rlhf_utils.py +2 -1

No files found.
--- a/examples/offline_inference/rlhf.py
+++ b/examples/offline_inference/rlhf.py
@@ -126,7 +126,10 @@ for name, p in train_model.named_parameters():

 # Synchronize the updated weights to the inference engine.
 for name, p in train_model.named_parameters():
-    handle = llm.collective_rpc.remote("update_weight", args=(name, p.dtype, p.shape))
+    dtype_name = str(p.dtype).split(".")[-1]
+    handle = llm.collective_rpc.remote(
+        "update_weight", args=(name, dtype_name, p.shape)
+    )
    model_update_group.broadcast(p, src=0, stream=torch.cuda.current_stream())
    ray.get(handle)


--- a/examples/offline_inference/rlhf_utils.py
+++ b/examples/offline_inference/rlhf_utils.py
@@ -45,7 +45,8 @@ class WorkerExtension:
            self.device,
        )

-    def update_weight(self, name, dtype, shape):
+    def update_weight(self, name, dtype_name, shape):
+        dtype = getattr(torch, dtype_name)
        weight = torch.empty(shape, dtype=dtype, device="cuda")
        self.model_update_group.broadcast(
            weight, src=0, stream=torch.cuda.current_stream()