Unverified Commit 4a05bdfa authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Revert "Check eagle server args" (#4242)

parent eb06dbcb
......@@ -284,13 +284,9 @@ class ServerArgs:
"Overlap scheduler are disabled because of using "
"eagle speculative decoding."
)
# The token generated from the verify step is counted in speculative_num_draft_tokens.
# The token generated from the verify step is counted.
# If sepculative_num_steps >= speculative_num_draft_tokens, the additional tokens will definitely be discarded.
assert self.speculative_num_steps < self.speculative_num_draft_tokens
assert (
self.speculative_num_draft_tokens - 1
<= self.speculative_num_steps * self.speculative_eagle_topk
)
# assert self.speculative_num_steps < self.speculative_num_draft_tokens
# GGUF
if (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment