Improve the structure of CI (#911)

995af5a5 · Ying Sheng · GitHub · 53985645 · 53985645 · 995af5a5
Unverified Commit 995af5a5 authored Aug 03, 2024 by Ying Sheng Committed by GitHub Aug 03, 2024
9 changed files
--- a/scripts/launch_tgi.sh
+++ b/scripts/launch_tgi.sh
-docker run --name tgi --rm -ti --gpus all --network host \
-  -v /home/ubuntu/model_weights/Llama-2-7b-chat-hf:/Llama-2-7b-chat-hf \
-  ghcr.io/huggingface/text-generation-inference:1.3.0 \
-  --model-id /Llama-2-7b-chat-hf --num-shard 1  --trust-remote-code \
-  --max-input-length 2048 --max-total-tokens 4096 \
-  --port 24000
--- a/test/README.md
+++ b/test/README.md
+# Run Unit Tests
+
+## Test Frontend Language
+```
+cd sglang/test/lang
+export OPENAI_API_KEY=sk-*****
+
+# Run a single file
+python3 test_openai_backend.py
+
+# Run a suite
+python3 run_suite.py --suite minimal
+```
+
+## Test Backend Runtime
+```
+cd sglang/test/srt
+
+# Run a single file
+python3 test_eval_accuracy.py
+
+# Run a suite
+python3 run_suite.py --suite minimal
+```
+
+
--- a/test/lang/run_suite.py
+++ b/test/lang/run_suite.py
 import argparse
 import glob
-import multiprocessing
-import os
-import time
-import unittest

-from sglang.utils import run_with_timeout
+from sglang.test.test_utils import run_unittest_files

 suites = {
-    "minimal": ["test_openai_backend.py", "test_srt_backend.py"],
+    "minimal": ["test_srt_backend.py", "test_openai_backend.py"],
 }


-def run_unittest_files(files, args):
-    for filename in files:
-
-        def func():
-            print(filename)
-            ret = unittest.main(module=None, argv=["", "-vb"] + [filename])
-
-        p = multiprocessing.Process(target=func)
-
-        def run_one_file():
-            p.start()
-            p.join()
-
-        try:
-            run_with_timeout(run_one_file, timeout=args.time_limit_per_file)
-            if p.exitcode != 0:
-                return False
-        except TimeoutError:
-            p.terminate()
-            time.sleep(5)
-            print(
-                f"\nTimeout after {args.time_limit_per_file} seconds "
-                f"when running {filename}"
-            )
-            return False
-
-    return True
-
-
 if __name__ == "__main__":
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument(
-        "--time-limit-per-file",
+        "--timeout-per-file",
        type=int,
        default=1000,
        help="The time limit for running one file in seconds.",
@@ -63,12 +30,5 @@ if __name__ == "__main__":
    else:
        files = suites[args.suite]

-    tic = time.time()
-    success = run_unittest_files(files, args)
-
-    if success:
-        print(f"Success. Time elapsed: {time.time() - tic:.2f}s")
-    else:
-        print(f"Fail. Time elapsed: {time.time() - tic:.2f}s")
-
-    exit(0 if success else -1)
+    exit_code = run_unittest_files(files, args.timeout_per_file)
+    exit(exit_code)
--- a/test/srt/models/test_causal_models.py
+++ b/test/srt/models/test_causal_models.py
@@ -18,6 +18,7 @@ import torch
 from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner

 MODELS = [
+    # (model_name, tp_size)
    ("meta-llama/Meta-Llama-3.1-8B-Instruct", 1),
    # ("meta-llama/Meta-Llama-3.1-8B-Instruct", 2),
 ]

--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
+import argparse
+import glob
+
+from sglang.test.test_utils import run_unittest_files
+
+suites = {
+    "minimal": [
+        "test_openai_server.py",
+        "test_eval_accuracy.py",
+        "test_chunked_prefill.py",
+        "test_torch_compile.py",
+        "models/test_causal_models.py",
+    ],
+}
+
+
+if __name__ == "__main__":
+    arg_parser = argparse.ArgumentParser()
+    arg_parser.add_argument(
+        "--timeout-per-file",
+        type=int,
+        default=1000,
+        help="The time limit for running one file in seconds.",
+    )
+    arg_parser.add_argument(
+        "--suite",
+        type=str,
+        default=list(suites.keys())[0],
+        choices=list(suites.keys()) + ["all"],
+        help="The suite to run",
+    )
+    args = arg_parser.parse_args()
+
+    if args.suite == "all":
+        files = glob.glob("**/test_*.py", recursive=True)
+    else:
+        files = suites[args.suite]
+
+    exit_code = run_unittest_files(files, args.timeout_per_file)
+    exit(exit_code)
--- a/test/srt/test_chunked_prefill.py
+++ b/test/srt/test_chunked_prefill.py
+import unittest
+from types import SimpleNamespace
+
+from sglang.srt.utils import kill_child_process
+from sglang.test.run_eval import run_eval
+from sglang.test.test_utils import MODEL_NAME_FOR_TEST, popen_launch_server
+
+
+class TestAccuracy(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.model = MODEL_NAME_FOR_TEST
+        cls.base_url = f"http://localhost:30000"
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=300,
+            other_args=["--chunked-prefill-size", "32"],
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        kill_child_process(cls.process.pid)
+
+    def test_mmlu(self):
+        args = SimpleNamespace(
+            base_url=self.base_url,
+            model=self.model,
+            eval_name="mmlu",
+            num_examples=20,
+            num_threads=20,
+        )
+
+        metrics = run_eval(args)
+        assert metrics["score"] >= 0.5
+
+
+if __name__ == "__main__":
+    unittest.main(warnings="ignore")
+
+    # t = TestAccuracy()
+    # t.setUpClass()
+    # t.test_mmlu()
+    # t.tearDownClass()
--- a/test/srt/test_eval_accuracy.py
+++ b/test/srt/test_eval_accuracy.py
-import json
 import unittest
 from types import SimpleNamespace

@@ -11,11 +10,9 @@ class TestAccuracy(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
-        port = 30000
-
        cls.model = MODEL_NAME_FOR_TEST
-        cls.base_url = f"http://localhost:{port}"
-        cls.process = popen_launch_server(cls.model, port, timeout=300)
+        cls.base_url = f"http://localhost:30000"
+        cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300)

    @classmethod
    def tearDownClass(cls):

--- a/test/srt/test_openai_server.py
+++ b/test/srt/test_openai_server.py
@@ -11,11 +11,10 @@ class TestOpenAIServer(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
-        port = 30000
-
        cls.model = MODEL_NAME_FOR_TEST
-        cls.base_url = f"http://localhost:{port}/v1"
-        cls.process = popen_launch_server(cls.model, port, timeout=300)
+        cls.base_url = f"http://localhost:30000"
+        cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300)
+        cls.base_url += "/v1"

    @classmethod
    def tearDownClass(cls):

--- a/test/srt/test_torch_compile.py
+++ b/test/srt/test_torch_compile.py
+import unittest
+from types import SimpleNamespace
+
+from sglang.srt.utils import kill_child_process
+from sglang.test.run_eval import run_eval
+from sglang.test.test_utils import MODEL_NAME_FOR_TEST, popen_launch_server
+
+
+class TestAccuracy(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.model = MODEL_NAME_FOR_TEST
+        cls.base_url = f"http://localhost:30000"
+        cls.process = popen_launch_server(
+            cls.model, cls.base_url, timeout=300, other_args=["--enable-torch-compile"]
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        kill_child_process(cls.process.pid)
+
+    def test_mmlu(self):
+        args = SimpleNamespace(
+            base_url=self.base_url,
+            model=self.model,
+            eval_name="mmlu",
+            num_examples=20,
+            num_threads=20,
+        )
+
+        metrics = run_eval(args)
+        assert metrics["score"] >= 0.5
+
+
+if __name__ == "__main__":
+    unittest.main(warnings="ignore")
+
+    # t = TestAccuracy()
+    # t.setUpClass()
+    # t.test_mmlu()
+    # t.tearDownClass()