[ci]try to fix flaky multi-step tests (#11894)

Signed-off-by: youkaichao <youkaichao@gmail.com>

[ci]try to fix flaky multi-step tests (#11894)
Signed-off-by: youkaichao <youkaichao@gmail.com>
bd828722 · youkaichao · GitHub · 405eb8e3 · bd828722 · bd828722
Unverified Commit bd828722 authored Jan 09, 2025 by youkaichao Committed by GitHub Jan 09, 2025
Show whitespace changes
Inline Side-by-side

Showing with 8 additions and 4 deletions

tests/multi_step/test_correctness_async_llm.py tests/multi_step/test_correctness_async_llm.py +1 -2

tests/utils.py tests/utils.py +7 -2

No files found.
--- a/tests/multi_step/test_correctness_async_llm.py
+++ b/tests/multi_step/test_correctness_async_llm.py
@@ -16,7 +16,6 @@ NUM_SCHEDULER_STEPS = [8]  # Multi-step decoding steps
 NUM_PROMPTS = [10]
 DEFAULT_SERVER_ARGS: List[str] = [
-    "--disable-log-requests",
    "--worker-use-ray",
    "--gpu-memory-utilization",
    "0.85",
@@ -110,7 +109,7 @@ async def test_multi_step(
    # Spin up client/server & issue completion API requests.
    # Default `max_wait_seconds` is 240 but was empirically
-    # was raised 3x to 720 *just for this test* due to
+    # was raised 5x to 1200 *just for this test* due to
    # observed timeouts in GHA CI
    ref_completions = await completions_with_server_args(
        prompts,

--- a/tests/utils.py
+++ b/tests/utils.py
@@ -157,13 +157,19 @@ class RemoteOpenAIServer:
    def url_for(self, *parts: str) -> str:
        return self.url_root + "/" + "/".join(parts)
-    def get_client(self):
+    def get_client(self, **kwargs):
+        if "timeout" not in kwargs:
+            kwargs["timeout"] = 600
        return openai.OpenAI(
            base_url=self.url_for("v1"),
            api_key=self.DUMMY_API_KEY,
+            max_retries=0,
+            **kwargs,
        )
    def get_async_client(self, **kwargs):
+        if "timeout" not in kwargs:
+            kwargs["timeout"] = 600
        return openai.AsyncOpenAI(base_url=self.url_for("v1"),
                                  api_key=self.DUMMY_API_KEY,
                                  max_retries=0,
@@ -780,7 +786,6 @@ async def completions_with_server_args(
    assert len(max_tokens) == len(prompts)
    outputs = None
-    max_wait_seconds = 240 * 3  # 240 is default
    with RemoteOpenAIServer(model_name,
                            server_cli_args,
                            max_wait_seconds=max_wait_seconds) as server: