add auto batching

37f10cad · baberabb · 2c20df08 · 37f10cad
Commit 37f10cad authored Nov 27, 2023 by baberabb
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 4 deletions

lm_eval/models/vllm_causallms.py lm_eval/models/vllm_causallms.py +4 -4

No files found.
--- a/lm_eval/models/vllm_causallms.py
+++ b/lm_eval/models/vllm_causallms.py
 from collections import defaultdict
-from typing import List, Tuple, Optional, Literal
+from typing import List, Tuple, Optional, Literal, Union

 from lm_eval.api.instance import Instance
 from lm_eval.api.model import LM
@@ -27,7 +27,7 @@ class VLLM(LM):
        quantization: Optional[Literal["awq"]] = None,
        max_gen_toks: int = 256,
        swap_space: int = 4,
-        batch_size: int = 1,
+        batch_size: Union[str, int] = 1,
        max_batch_size=None,
        max_length: int = None,
        seed: int = 1234,
@@ -206,7 +206,7 @@ class VLLM(LM):
        for key, re_ord in re_ords.items():
            chunks = utils.chunks(
                re_ord.get_reordered(),
-                n=self.batch_size,
+                n=self.batch_size if self.batch_size != "auto" else 0,
                fn=None,
            )
            for chunk in chunks:
@@ -285,7 +285,7 @@ class VLLM(LM):

        chunks = utils.chunks(
            re_ord.get_reordered(),
-            n=self.batch_size,
+            n=self.batch_size if self.batch_size != "auto" else 0,
            fn=None,
        )
        pbar = tqdm(total=len(requests), disable=disable_tqdm)