hotfix #2262 (#2264)

* max_length - 1 (generation always >= 1) * vllm: fix rolling prefix_token * nit: add comment * fixup! max_length should be handled for logliklihoods * Revert "fixup! max_length should be handled for logliklihoods" This reverts commit 432d1a3b754c117c3a54ea2fe792ab3a1bd09ed3.

hotfix #2262 (#2264)
* max_length - 1 (generation always >= 1) * vllm: fix rolling prefix_token * nit: add comment * fixup! max_length should be handled for logliklihoods * Revert "fixup! max_length should be handled for logliklihoods" This reverts commit 432d1a3b754c117c3a54ea2fe792ab3a1bd09ed3.
928e8bb6 · Baber Abbasi · GitHub · b31f92e8 · 928e8bb6
Unverified Commit 928e8bb6 authored Aug 30, 2024 by Baber Abbasi Committed by GitHub Aug 30, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 4 deletions

lm_eval/models/api_models.py lm_eval/models/api_models.py +5 -4

No files found.
--- a/lm_eval/models/api_models.py
+++ b/lm_eval/models/api_models.py
@@ -104,8 +104,9 @@ class TemplateAPI(TemplateLM):
        self._truncate = truncate
        self._max_gen_toks = int(max_gen_toks)
        self._seed = int(seed)
-        eval_logger.info(f"Using max length {max_length}")
-        self.max_length = max_length
+        # max_length - 1 as we always have 1 token for generation
+        eval_logger.info(f"Using max length {max_length} - 1")
+        self.max_length = max_length - 1
        if int(num_concurrent) <= 1:
            eval_logger.info(
                "Concurrent requests are disabled. To enable concurrent requests, set `num_concurrent` > 1."
@@ -419,9 +420,9 @@ class TemplateAPI(TemplateLM):
        for chunk in chunks:
            for cache_key, context_enc, continuation_enc in chunk:
                # max_length - 1 as we always have 1 token for generation
-                inp = (context_enc + continuation_enc)[-(self.max_length - 1) :]
+                inp = (context_enc + continuation_enc)[-(self.max_length) :]
                ctxlen = len(context_enc) - max(
-                    0, len(context_enc) + len(continuation_enc) - (self.max_length - 1)
+                    0, len(context_enc) + len(continuation_enc) - (self.max_length)
                )

                inputs.append(inp)