Unverified Commit 928e8bb6 authored by Baber Abbasi's avatar Baber Abbasi Committed by GitHub
Browse files

hotfix #2262 (#2264)

* max_length - 1 (generation always >= 1)

* vllm: fix rolling prefix_token

* nit: add comment

* fixup! max_length should be handled for logliklihoods

* Revert "fixup! max_length should be handled for logliklihoods"

This reverts commit 432d1a3b754c117c3a54ea2fe792ab3a1bd09ed3.
parent b31f92e8
...@@ -104,8 +104,9 @@ class TemplateAPI(TemplateLM): ...@@ -104,8 +104,9 @@ class TemplateAPI(TemplateLM):
self._truncate = truncate self._truncate = truncate
self._max_gen_toks = int(max_gen_toks) self._max_gen_toks = int(max_gen_toks)
self._seed = int(seed) self._seed = int(seed)
eval_logger.info(f"Using max length {max_length}") # max_length - 1 as we always have 1 token for generation
self.max_length = max_length eval_logger.info(f"Using max length {max_length} - 1")
self.max_length = max_length - 1
if int(num_concurrent) <= 1: if int(num_concurrent) <= 1:
eval_logger.info( eval_logger.info(
"Concurrent requests are disabled. To enable concurrent requests, set `num_concurrent` > 1." "Concurrent requests are disabled. To enable concurrent requests, set `num_concurrent` > 1."
...@@ -419,9 +420,9 @@ class TemplateAPI(TemplateLM): ...@@ -419,9 +420,9 @@ class TemplateAPI(TemplateLM):
for chunk in chunks: for chunk in chunks:
for cache_key, context_enc, continuation_enc in chunk: for cache_key, context_enc, continuation_enc in chunk:
# max_length - 1 as we always have 1 token for generation # max_length - 1 as we always have 1 token for generation
inp = (context_enc + continuation_enc)[-(self.max_length - 1) :] inp = (context_enc + continuation_enc)[-(self.max_length) :]
ctxlen = len(context_enc) - max( ctxlen = len(context_enc) - max(
0, len(context_enc) + len(continuation_enc) - (self.max_length - 1) 0, len(context_enc) + len(continuation_enc) - (self.max_length)
) )
inputs.append(inp) inputs.append(inp)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment