Unverified Commit 68af77e5 authored by JartX's avatar JartX Committed by GitHub
Browse files

[FIXBUG] Correctly Apply Grammar Bitmask in Mixed Batches (#22896)


Signed-off-by: default avatarJartX <sagformas@epdcenter.es>
parent 6b04039a
......@@ -1337,9 +1337,10 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
out_indices = []
# Reorder the bitmask to match the order of the requests in the batch.
sorted_bitmask = np.zeros_like(grammar_bitmask,
shape=(logits.shape[0],
grammar_bitmask.shape[1]))
sorted_bitmask = np.full(shape=(logits.shape[0],
grammar_bitmask.shape[1]),
fill_value=-1,
dtype=grammar_bitmask.dtype)
cumulative_index = 0
seq = sorted(scheduler_output.structured_output_request_ids.items(),
key=lambda x: x[1])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment