Update vLLM compatibility (#3024)

* Update vLLM compatibility Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> * add TokensPrompt to all generate calls --------- Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Co-authored-by: Baber <baber@hey.com>

Update vLLM compatibility (#3024)
* Update vLLM compatibility Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> * add TokensPrompt to all generate calls --------- Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Co-authored-by: Baber <baber@hey.com>
bc811365 · Cyrus Leung · GitHub · 4f8195f1 · bc811365
Unverified Commit bc811365 authored Aug 03, 2025 by Cyrus Leung Committed by GitHub Aug 02, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 12 deletions

lm_eval/models/vllm_causallms.py lm_eval/models/vllm_causallms.py +4 -12

No files found.
--- a/lm_eval/models/vllm_causallms.py
+++ b/lm_eval/models/vllm_causallms.py
 import copy
 import gc
-import inspect
 import logging
 import os
 from importlib.metadata import version
@@ -33,7 +32,7 @@ from lm_eval.utils import (
 try:
    import ray
-    from vllm import LLM, SamplingParams
+    from vllm import LLM, SamplingParams, TokensPrompt
    from vllm.lora.request import LoRARequest
    from vllm.transformers_utils.tokenizer import get_tokenizer
    from vllm.utils import get_open_port
@@ -79,7 +78,7 @@ def _vllm_mp_worker(
    try:
        llm = LLM(**model_args)
        res = llm.generate(
-            prompt_token_ids=requests,
+            [TokensPrompt(prompt_token_ids=request) for request in requests],
            sampling_params=sampling_params,
            lora_request=lora_request,
        )
@@ -239,13 +238,6 @@ class VLLM(TemplateLM):
                    model_config = engine_args.create_model_config()
                    kwargs_resolve_hf_chat_template["model_config"] = model_config
-            # https://github.com/vllm-project/vllm/pull/18259
-            if (
-                "trsut_remote_code"
-                in inspect.signature(resolve_hf_chat_template).parameters
-            ):
-                kwargs_resolve_hf_chat_template["trsut_remote_code"] = trust_remote_code
            else:
                kwargs_resolve_hf_chat_template["trust_remote_code"] = trust_remote_code
@@ -395,7 +387,7 @@ class VLLM(TemplateLM):
            ):
                llm = LLM(**model_args)
                return llm.generate(
-                    prompt_token_ids=requests,
+                    [TokensPrompt(prompt_token_ids=request) for request in requests],
                    sampling_params=sampling_params,
                    lora_request=lora_request,
                )
@@ -484,7 +476,7 @@ class VLLM(TemplateLM):
        else:
            outputs = self.model.generate(
-                prompt_token_ids=requests,
+                [TokensPrompt(prompt_token_ids=request) for request in requests],
                sampling_params=sampling_params,
                use_tqdm=True if self.batch_size == "auto" else False,
                lora_request=self.lora_request,