Unverified Commit 4e94951b authored by Michal Adamczyk's avatar Michal Adamczyk Committed by GitHub
Browse files

[BUGFIX] Move scores to float32 in case of running xgrammar on cpu (#12152)


Signed-off-by: default avatarMichal Adamczyk <madamczyk@habana.ai>
parent 7a8a48d5
......@@ -298,8 +298,11 @@ class XGrammarLogitsProcessor:
# token_bitmask is a CPU tensor for use with accept_token and
# fill_next_token_bitmask so we move it to the device of scores
device_type = scores.device.type
dtype = scores.dtype
if device_type != "cuda":
scores = scores.to("cpu").unsqueeze(0)
# xgrammar on cpu only supports float32 scores
# see: https://github.com/mlc-ai/xgrammar/blob/c1b64920cad24f44f235778c1c00bb52d57da01a/python/xgrammar/kernels/apply_token_bitmask_inplace_cpu.py#L22
scores = scores.to("cpu").float().unsqueeze(0)
# Note: In this method, if the tensors have different dimensions
# on CPU device fails, but on GPU it runs without error. Hence the
......@@ -307,7 +310,7 @@ class XGrammarLogitsProcessor:
xgr.apply_token_bitmask_inplace(scores,
self.token_bitmask.to(scores.device))
if device_type != "cuda":
scores = scores.to(device_type).squeeze()
scores = scores.to(dtype).to(device_type).squeeze()
return scores
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment