Unverified Commit 995af5a5 authored by Ying Sheng's avatar Ying Sheng Committed by GitHub
Browse files

Improve the structure of CI (#911)

parent 53985645
docker run --name tgi --rm -ti --gpus all --network host \
-v /home/ubuntu/model_weights/Llama-2-7b-chat-hf:/Llama-2-7b-chat-hf \
ghcr.io/huggingface/text-generation-inference:1.3.0 \
--model-id /Llama-2-7b-chat-hf --num-shard 1 --trust-remote-code \
--max-input-length 2048 --max-total-tokens 4096 \
--port 24000
# Run Unit Tests
## Test Frontend Language
```
cd sglang/test/lang
export OPENAI_API_KEY=sk-*****
# Run a single file
python3 test_openai_backend.py
# Run a suite
python3 run_suite.py --suite minimal
```
## Test Backend Runtime
```
cd sglang/test/srt
# Run a single file
python3 test_eval_accuracy.py
# Run a suite
python3 run_suite.py --suite minimal
```
import argparse
import glob
import multiprocessing
import os
import time
import unittest
from sglang.utils import run_with_timeout
from sglang.test.test_utils import run_unittest_files
suites = {
"minimal": ["test_openai_backend.py", "test_srt_backend.py"],
"minimal": ["test_srt_backend.py", "test_openai_backend.py"],
}
def run_unittest_files(files, args):
for filename in files:
def func():
print(filename)
ret = unittest.main(module=None, argv=["", "-vb"] + [filename])
p = multiprocessing.Process(target=func)
def run_one_file():
p.start()
p.join()
try:
run_with_timeout(run_one_file, timeout=args.time_limit_per_file)
if p.exitcode != 0:
return False
except TimeoutError:
p.terminate()
time.sleep(5)
print(
f"\nTimeout after {args.time_limit_per_file} seconds "
f"when running {filename}"
)
return False
return True
if __name__ == "__main__":
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument(
"--time-limit-per-file",
"--timeout-per-file",
type=int,
default=1000,
help="The time limit for running one file in seconds.",
......@@ -63,12 +30,5 @@ if __name__ == "__main__":
else:
files = suites[args.suite]
tic = time.time()
success = run_unittest_files(files, args)
if success:
print(f"Success. Time elapsed: {time.time() - tic:.2f}s")
else:
print(f"Fail. Time elapsed: {time.time() - tic:.2f}s")
exit(0 if success else -1)
exit_code = run_unittest_files(files, args.timeout_per_file)
exit(exit_code)
......@@ -18,6 +18,7 @@ import torch
from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner
MODELS = [
# (model_name, tp_size)
("meta-llama/Meta-Llama-3.1-8B-Instruct", 1),
# ("meta-llama/Meta-Llama-3.1-8B-Instruct", 2),
]
......
import argparse
import glob
from sglang.test.test_utils import run_unittest_files
suites = {
"minimal": [
"test_openai_server.py",
"test_eval_accuracy.py",
"test_chunked_prefill.py",
"test_torch_compile.py",
"models/test_causal_models.py",
],
}
if __name__ == "__main__":
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument(
"--timeout-per-file",
type=int,
default=1000,
help="The time limit for running one file in seconds.",
)
arg_parser.add_argument(
"--suite",
type=str,
default=list(suites.keys())[0],
choices=list(suites.keys()) + ["all"],
help="The suite to run",
)
args = arg_parser.parse_args()
if args.suite == "all":
files = glob.glob("**/test_*.py", recursive=True)
else:
files = suites[args.suite]
exit_code = run_unittest_files(files, args.timeout_per_file)
exit(exit_code)
import unittest
from types import SimpleNamespace
from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval
from sglang.test.test_utils import MODEL_NAME_FOR_TEST, popen_launch_server
class TestAccuracy(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = MODEL_NAME_FOR_TEST
cls.base_url = f"http://localhost:30000"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=300,
other_args=["--chunked-prefill-size", "32"],
)
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
def test_mmlu(self):
args = SimpleNamespace(
base_url=self.base_url,
model=self.model,
eval_name="mmlu",
num_examples=20,
num_threads=20,
)
metrics = run_eval(args)
assert metrics["score"] >= 0.5
if __name__ == "__main__":
unittest.main(warnings="ignore")
# t = TestAccuracy()
# t.setUpClass()
# t.test_mmlu()
# t.tearDownClass()
import json
import unittest
from types import SimpleNamespace
......@@ -11,11 +10,9 @@ class TestAccuracy(unittest.TestCase):
@classmethod
def setUpClass(cls):
port = 30000
cls.model = MODEL_NAME_FOR_TEST
cls.base_url = f"http://localhost:{port}"
cls.process = popen_launch_server(cls.model, port, timeout=300)
cls.base_url = f"http://localhost:30000"
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300)
@classmethod
def tearDownClass(cls):
......
......@@ -11,11 +11,10 @@ class TestOpenAIServer(unittest.TestCase):
@classmethod
def setUpClass(cls):
port = 30000
cls.model = MODEL_NAME_FOR_TEST
cls.base_url = f"http://localhost:{port}/v1"
cls.process = popen_launch_server(cls.model, port, timeout=300)
cls.base_url = f"http://localhost:30000"
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300)
cls.base_url += "/v1"
@classmethod
def tearDownClass(cls):
......
import unittest
from types import SimpleNamespace
from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval
from sglang.test.test_utils import MODEL_NAME_FOR_TEST, popen_launch_server
class TestAccuracy(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = MODEL_NAME_FOR_TEST
cls.base_url = f"http://localhost:30000"
cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=300, other_args=["--enable-torch-compile"]
)
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
def test_mmlu(self):
args = SimpleNamespace(
base_url=self.base_url,
model=self.model,
eval_name="mmlu",
num_examples=20,
num_threads=20,
)
metrics = run_eval(args)
assert metrics["score"] >= 0.5
if __name__ == "__main__":
unittest.main(warnings="ignore")
# t = TestAccuracy()
# t.setUpClass()
# t.test_mmlu()
# t.tearDownClass()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment