avoid timeout errors with high concurrency in api_model (#2307)

* avoid timeout errors with high concurrency in api_model * style * add timeout * add docs --------- Co-authored-by: Baber <baber@hey.com>

avoid timeout errors with high concurrency in api_model (#2307)
* avoid timeout errors with high concurrency in api_model * style * add timeout * add docs --------- Co-authored-by: Baber <baber@hey.com>
9632b343 · Trawinski, Dariusz · GitHub · f49b0377 · 9632b343 · 9632b343
Unverified Commit 9632b343 authored Dec 03, 2024 by Trawinski, Dariusz Committed by GitHub Dec 04, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 2 deletions

docs/API_guide.md docs/API_guide.md +4 -0

lm_eval/models/api_models.py lm_eval/models/api_models.py +7 -2

No files found.
--- a/docs/API_guide.md
+++ b/docs/API_guide.md
@@ -50,6 +50,10 @@ When initializing a `TemplateAPI` instance or a subclass, you can provide severa
   - Useful for APIs that support parallel processing.
   - Default is 1 (sequential processing).
+- `timeout` (int, optional):
+   - Timeout for API requests in seconds.
+   - Default is 30.
 - `tokenized_requests` (bool):
  - Determines whether the input is pre-tokenized. Defaults to `True`.
  - Requests can be sent in either tokenized form (`list[list[int]]`) or as text (`list[str]`, or `str` for batch_size=1).

--- a/lm_eval/models/api_models.py
+++ b/lm_eval/models/api_models.py
@@ -21,7 +21,7 @@ from typing import (
 try:
    import requests
-    from aiohttp import ClientSession, TCPConnector
+    from aiohttp import ClientSession, ClientTimeout, TCPConnector
    from tenacity import RetryError, retry, stop_after_attempt, wait_exponential
    from tqdm import tqdm
    from tqdm.asyncio import tqdm_asyncio
@@ -81,6 +81,8 @@ class TemplateAPI(TemplateLM):
        use_fast_tokenizer: bool = True,
        verify_certificate: bool = True,
        eos_string: str = None,
+        # timeout in seconds
+        timeout: int = 300,
        **kwargs,
    ) -> None:
        super().__init__()
@@ -126,6 +128,7 @@ class TemplateAPI(TemplateLM):
        self.max_retries = int(max_retries)
        self.verify_certificate = verify_certificate
        self._eos_string = eos_string
+        self.timeout = int(timeout)
        eval_logger.info(f"Using tokenizer {self.tokenizer_backend}")
        if self.tokenizer_backend is None:
@@ -466,7 +469,9 @@ class TemplateAPI(TemplateLM):
    ) -> Union[List[List[str]], List[List[Tuple[float, bool]]]]:
        ctxlens = ctxlens if ctxlens else [None] * len(requests)
        conn = TCPConnector(limit=self._concurrent)
-        async with ClientSession(connector=conn) as session:
+        async with ClientSession(
+            connector=conn, timeout=ClientTimeout(total=self.timeout)
+        ) as session:
            retry_: Callable[..., Awaitable[Any]] = retry(
                stop=stop_after_attempt(self.max_retries),
                wait=wait_exponential(multiplier=0.5, min=1, max=10),