single GPU automatic batching logic

d5720d5f · Benjamin Fattori · 2e522e2c · d5720d5f · d5720d5f · d5720d5f
Commit d5720d5f authored Mar 09, 2023 by Benjamin Fattori
Showing with 36 additions and 13 deletions

lm_eval/base.py lm_eval/base.py +21 -2

lm_eval/models/gpt2.py lm_eval/models/gpt2.py +7 -8

lm_eval/models/huggingface.py lm_eval/models/huggingface.py +7 -2

main.py main.py +1 -1

No files found.
--- a/lm_eval/base.py
+++ b/lm_eval/base.py
@@ -11,6 +11,8 @@ from sqlitedict import SqliteDict
 from tqdm import tqdm
 import torch
 import torch.nn.functional as F
+from accelerate import find_executable_batch_size
+

 from lm_eval.metrics import mean, weighted_perplexity, weighted_mean, bits_per_byte
 from lm_eval import utils
@@ -233,10 +235,27 @@ class BaseLM(LM):
            toks = x[1] + x[2]
            return -len(toks), tuple(toks)

-        # TODO: automatic (variable) batch size detection for vectorization
+        
        re_ord = utils.Reorderer(requests, _collate)
+
+        # automatic (variable) batch size detection for vectorization
+        # pull longest context sample from request
+        _, context_enc, continuation_enc = re_ord.get_reordered()[0] 
+        max_context = len(context_enc) + len(continuation_enc)
+        if self.batch_size == 'auto':
+            print('Passed argument batch_size = auto. Detecting largest batch size')
+            @find_executable_batch_size(starting_batch_size=512) # if OOM, then halves batch_size and tries again
+            def forward_batch(batch_size):
+                test_batch = torch.ones((batch_size, max_context), device=self.device).long()
+                self._model_call(test_batch) 
+                return batch_size
+            
+            batch_size = forward_batch() 
+            print(f"Determined Largest batch size: {batch_size}")
+            adaptive_batch_size = batch_size
+
        for chunk in utils.chunks(
-            tqdm(re_ord.get_reordered(), disable=disable_tqdm), self.batch_size
+            tqdm(re_ord.get_reordered(), disable=disable_tqdm), self.batch_size if self.batch_size != "auto" else adaptive_batch_size
        ):
            inps = []
            cont_toks_list = []

--- a/lm_eval/models/gpt2.py
+++ b/lm_eval/models/gpt2.py
 import torch
 import transformers
 from lm_eval.base import BaseLM
-
+from accelerate import find_executable_batch_size

 class HFLM(BaseLM):
    def __init__(
@@ -18,7 +18,7 @@ class HFLM(BaseLM):

        assert isinstance(device, str)
        assert isinstance(pretrained, str)
-        assert isinstance(batch_size, int)
+        assert isinstance(batch_size, (int,str))

        if device:
            if device not in ["cuda", "cpu"]:
@@ -69,13 +69,12 @@ class HFLM(BaseLM):
                31373,
            ], self.tokenizer.encode("hello\n\nhello")

-        # multithreading and batching
-        self.batch_size_per_gpu = batch_size  # todo: adaptive batch size
+        # setup for automatic batch size detection
+        if batch_size == 'auto': 
+            self.batch_size_per_gpu = batch_size
+        else:
+            self.batch_size_per_gpu = int(batch_size) 

-        # TODO: fix multi-gpu
-        # gpus = torch.cuda.device_count()
-        # if gpus > 1:
-        #     self.gpt2 = nn.DataParallel(self.gpt2)

    @property
    def eot_token_id(self):

--- a/lm_eval/models/huggingface.py
+++ b/lm_eval/models/huggingface.py
@@ -129,7 +129,7 @@ class HuggingFaceAutoLM(BaseLM):

        assert isinstance(pretrained, str)
        assert isinstance(device, str)
-        assert isinstance(batch_size, int)
+        assert isinstance(batch_size, (int, str))
        if (
            add_special_tokens is not None
            and self.AUTO_MODEL_CLASS is transformers.AutoModelForCausalLM
@@ -143,7 +143,12 @@ class HuggingFaceAutoLM(BaseLM):
                not add_special_tokens
            ), "Evaluating causal models with `add_special_tokens=True` is currently not supported."

-        self._batch_size = batch_size  # TODO: Adaptive batch size
+        # setup for automatic batch size detection
+        if batch_size == 'auto': 
+            self._batch_size = batch_size
+        else:
+            self._batch_size = int(batch_size) 
+
        self._max_gen_toks = max_gen_toks
        self._max_length = max_length
        self._config = self.AUTO_CONFIG_CLASS.from_pretrained(

--- a/main.py
+++ b/main.py
@@ -32,7 +32,7 @@ def parse_args():
    parser.add_argument("--tasks", default=None, choices=MultiChoice(tasks.ALL_TASKS))
    parser.add_argument("--provide_description", action="store_true")
    parser.add_argument("--num_fewshot", type=int, default=0)
-    parser.add_argument("--batch_size", type=int, default=None)
+    parser.add_argument("--batch_size", type=str, default=None)
    parser.add_argument("--device", type=str, default=None)
    parser.add_argument("--output_path", default=None)
    parser.add_argument("--limit", type=int, default=None)