Unverified Commit 54fb1c80 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Clean up unit tests (#1020)

parent b68c4c07
...@@ -37,12 +37,12 @@ jobs: ...@@ -37,12 +37,12 @@ jobs:
pip install accelerate pip install accelerate
pip install sentence_transformers pip install sentence_transformers
- name: Test Frontend Language - name: Test Backend Runtime
run: | run: |
cd test/lang cd test/srt
python3 run_suite.py --suite minimal python3 run_suite.py --suite minimal
- name: Test Backend Runtime - name: Test Frontend Language
run: | run: |
cd test/srt cd test/lang
python3 run_suite.py --suite minimal python3 run_suite.py --suite minimal
...@@ -167,17 +167,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct ...@@ -167,17 +167,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
- If the model does not have a template in the Hugging Face tokenizer, you can specify a [custom chat template](docs/en/custom_chat_template.md). - If the model does not have a template in the Hugging Face tokenizer, you can specify a [custom chat template](docs/en/custom_chat_template.md).
- To enable fp8 quantization, you can add `--quantization fp8` on a fp16 checkpoint or directly load a fp8 checkpoint without specifying any arguments. - To enable fp8 quantization, you can add `--quantization fp8` on a fp16 checkpoint or directly load a fp8 checkpoint without specifying any arguments.
- To enable experimental torch.compile support, you can add `--enable-torch-compile`. It accelerates small models on small batch sizes. - To enable experimental torch.compile support, you can add `--enable-torch-compile`. It accelerates small models on small batch sizes.
### Use Models From ModelScope
To use model from [ModelScope](https://www.modelscope.cn), setting environment variable SGLANG_USE_MODELSCOPE.
```
export SGLANG_USE_MODELSCOPE=true
```
Launch [Qwen2-7B-Instruct](https://www.modelscope.cn/models/qwen/qwen2-7b-instruct) Server
```
SGLANG_USE_MODELSCOPE=true python -m sglang.launch_server --model-path qwen/Qwen2-7B-Instruct --port 30000
```
### Supported Models ### Supported Models
- Llama / Llama 2 / Llama 3 / Llama 3.1 - Llama / Llama 2 / Llama 3 / Llama 3.1
...@@ -203,7 +193,17 @@ SGLANG_USE_MODELSCOPE=true python -m sglang.launch_server --model-path qwen/Qwen ...@@ -203,7 +193,17 @@ SGLANG_USE_MODELSCOPE=true python -m sglang.launch_server --model-path qwen/Qwen
Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/en/model_support.md). Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/en/model_support.md).
### Run Llama 3.1 405B #### Use Models From ModelScope
To use model from [ModelScope](https://www.modelscope.cn), setting environment variable SGLANG_USE_MODELSCOPE.
```
export SGLANG_USE_MODELSCOPE=true
```
Launch [Qwen2-7B-Instruct](https://www.modelscope.cn/models/qwen/qwen2-7b-instruct) Server
```
SGLANG_USE_MODELSCOPE=true python -m sglang.launch_server --model-path qwen/Qwen2-7B-Instruct --port 30000
```
#### Run Llama 3.1 405B
```bash ```bash
## Run 405B (fp8) on a single node ## Run 405B (fp8) on a single node
......
...@@ -6,6 +6,9 @@ Use these commands to format your code and pass CI linting tests. ...@@ -6,6 +6,9 @@ Use these commands to format your code and pass CI linting tests.
``` ```
pip3 install pre-commit pip3 install pre-commit
cd sglang cd sglang
pre-commit install . pre-commit install
pre-commit run --all-files pre-commit run --all-files
``` ```
## Add Unit Tests
Add unit tests under [sglang/test](../../test). You can learn how to add and run tests from the README.md in that folder.
...@@ -461,8 +461,11 @@ class ModelTpServer: ...@@ -461,8 +461,11 @@ class ModelTpServer:
next_token_ids = next_token_ids.tolist() next_token_ids = next_token_ids.tolist()
else: else:
if self.tokenizer is None: if self.tokenizer is None:
for i, req in enumerate(batch.reqs): next_token_ids = []
next_token_ids.extend(req.sampling_params.stop_token_ids) for req in batch.reqs:
next_token_ids.append(
next(iter(req.sampling_params.stop_token_ids))
)
else: else:
next_token_ids = [self.tokenizer.eos_token_id] * len(batch.reqs) next_token_ids = [self.tokenizer.eos_token_id] * len(batch.reqs)
......
...@@ -149,7 +149,7 @@ def test_decode_json(): ...@@ -149,7 +149,7 @@ def test_decode_json():
assert isinstance(js_obj["population"], int) assert isinstance(js_obj["population"], int)
def test_expert_answer(): def test_expert_answer(check_answer=True):
@sgl.function @sgl.function
def expert_answer(s, question): def expert_answer(s, question):
s += "Question: " + question + "\n" s += "Question: " + question + "\n"
...@@ -167,7 +167,9 @@ def test_expert_answer(): ...@@ -167,7 +167,9 @@ def test_expert_answer():
) )
ret = expert_answer.run(question="What is the capital of France?", temperature=0.1) ret = expert_answer.run(question="What is the capital of France?", temperature=0.1)
assert "paris" in ret.text().lower()
if check_answer:
assert "paris" in ret.text().lower(), f"Answer: {ret.text()}"
def test_tool_use(): def test_tool_use():
......
# Run Unit Tests # Run Unit Tests
## Test Frontend Language SGLang uses the built-in library [unittest](https://docs.python.org/3/library/unittest.html) as the testing framework.
```
cd sglang/test/lang ## Test Backend Runtime
export OPENAI_API_KEY=sk-***** ```bash
cd sglang/test/srt
# Run a single file # Run a single file
python3 test_openai_backend.py python3 test_srt_endpoint.py
# Run a suite # Run a single test
python3 -m unittest test_srt_endpoint.TestSRTEndpoint.test_simple_decode
# Run a suite with multiple files
python3 run_suite.py --suite minimal python3 run_suite.py --suite minimal
``` ```
## Test Backend Runtime ## Test Frontend Language
``` ```bash
cd sglang/test/srt cd sglang/test/lang
export OPENAI_API_KEY=sk-*****
# Run a single file # Run a single file
python3 test_eval_accuracy.py python3 test_openai_backend.py
# Run a single test
python3 -m unittest test_openai_backend.TestOpenAIBackend.test_few_shot_qa
# Run a suite # Run a suite with multiple files
python3 run_suite.py --suite minimal python3 run_suite.py --suite minimal
``` ```
...@@ -21,11 +21,4 @@ class TestAnthropicBackend(unittest.TestCase): ...@@ -21,11 +21,4 @@ class TestAnthropicBackend(unittest.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
unittest.main(warnings="ignore") unittest.main()
# from sglang.global_config import global_config
# global_config.verbosity = 2
# t = TestAnthropicBackend()
# t.setUpClass()
# t.test_mt_bench()
...@@ -48,8 +48,4 @@ class TestBind(unittest.TestCase): ...@@ -48,8 +48,4 @@ class TestBind(unittest.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
unittest.main(warnings="ignore") unittest.main()
# t = TestBind()
# t.setUpClass()
# t.test_cache()
...@@ -87,9 +87,4 @@ class TestChoices(unittest.TestCase): ...@@ -87,9 +87,4 @@ class TestChoices(unittest.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
unittest.main(warnings="ignore") unittest.main()
# t = TestChoices()
# t.test_token_length_normalized()
# t.test_greedy_token_selection()
# t.test_unconditional_likelihood_normalized()
...@@ -21,4 +21,4 @@ class TestAnthropicBackend(unittest.TestCase): ...@@ -21,4 +21,4 @@ class TestAnthropicBackend(unittest.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
unittest.main(warnings="ignore") unittest.main()
...@@ -88,11 +88,4 @@ class TestOpenAIBackend(unittest.TestCase): ...@@ -88,11 +88,4 @@ class TestOpenAIBackend(unittest.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
unittest.main(warnings="ignore") unittest.main()
# from sglang.global_config import global_config
# global_config.verbosity = 2
# t = TestOpenAIBackend()
# t.setUpClass()
# t.test_stream()
...@@ -61,12 +61,4 @@ class TestSRTBackend(unittest.TestCase): ...@@ -61,12 +61,4 @@ class TestSRTBackend(unittest.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
unittest.main(warnings="ignore") unittest.main()
# from sglang.global_config import global_config
# global_config.verbosity = 2
# t = TestSRTBackend()
# t.setUpClass()
# t.test_few_shot_qa()
# t.tearDownClass()
...@@ -125,7 +125,4 @@ class TestTracing(unittest.TestCase): ...@@ -125,7 +125,4 @@ class TestTracing(unittest.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
unittest.main(warnings="ignore") unittest.main()
# t = TestTracing()
# t.test_multi_function()
...@@ -14,26 +14,22 @@ from sglang.test.test_programs import ( ...@@ -14,26 +14,22 @@ from sglang.test.test_programs import (
class TestVertexAIBackend(unittest.TestCase): class TestVertexAIBackend(unittest.TestCase):
backend = None backend = None
chat_backend = None
chat_vision_backend = None
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.backend = VertexAI("gemini-pro") cls.backend = VertexAI("gemini-1.5-pro-001")
cls.chat_backend = VertexAI("gemini-pro")
cls.chat_vision_backend = VertexAI("gemini-pro-vision")
def test_few_shot_qa(self): def test_few_shot_qa(self):
set_default_backend(self.backend) set_default_backend(self.backend)
test_few_shot_qa() test_few_shot_qa()
def test_mt_bench(self): def test_mt_bench(self):
set_default_backend(self.chat_backend) set_default_backend(self.backend)
test_mt_bench() test_mt_bench()
def test_expert_answer(self): def test_expert_answer(self):
set_default_backend(self.backend) set_default_backend(self.backend)
test_expert_answer() test_expert_answer(check_answer=False)
def test_parallel_decoding(self): def test_parallel_decoding(self):
set_default_backend(self.backend) set_default_backend(self.backend)
...@@ -44,7 +40,7 @@ class TestVertexAIBackend(unittest.TestCase): ...@@ -44,7 +40,7 @@ class TestVertexAIBackend(unittest.TestCase):
test_parallel_encoding() test_parallel_encoding()
def test_image_qa(self): def test_image_qa(self):
set_default_backend(self.chat_vision_backend) set_default_backend(self.backend)
test_image_qa() test_image_qa()
def test_stream(self): def test_stream(self):
...@@ -53,11 +49,4 @@ class TestVertexAIBackend(unittest.TestCase): ...@@ -53,11 +49,4 @@ class TestVertexAIBackend(unittest.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
unittest.main(warnings="ignore") unittest.main()
# from sglang.global_config import global_config
# global_config.verbosity = 2
# t = TestVertexAIBackend()
# t.setUpClass()
# t.test_stream()
...@@ -6,9 +6,9 @@ from sglang.test.test_utils import run_unittest_files ...@@ -6,9 +6,9 @@ from sglang.test.test_utils import run_unittest_files
suites = { suites = {
"minimal": [ "minimal": [
"test_eval_accuracy.py", "test_eval_accuracy.py",
"test_embedding_openai_server.py",
"test_openai_server.py", "test_openai_server.py",
"test_vision_openai_server.py", "test_vision_openai_server.py",
"test_embedding_openai_server.py",
"test_chunked_prefill.py", "test_chunked_prefill.py",
"test_torch_compile.py", "test_torch_compile.py",
"test_models_from_modelscope.py", "test_models_from_modelscope.py",
......
...@@ -37,9 +37,4 @@ class TestAccuracy(unittest.TestCase): ...@@ -37,9 +37,4 @@ class TestAccuracy(unittest.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
unittest.main(warnings="ignore") unittest.main()
# t = TestAccuracy()
# t.setUpClass()
# t.test_mmlu()
# t.tearDownClass()
import json
import time
import unittest import unittest
import openai import openai
from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.openai_api.protocol import EmbeddingObject
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import popen_launch_server from sglang.test.test_utils import popen_launch_server
...@@ -65,12 +62,12 @@ class TestOpenAIServer(unittest.TestCase): ...@@ -65,12 +62,12 @@ class TestOpenAIServer(unittest.TestCase):
), f"{response.usage.total_tokens} vs {num_prompt_tokens}" ), f"{response.usage.total_tokens} vs {num_prompt_tokens}"
def run_batch(self): def run_batch(self):
# FIXME not implemented # FIXME: not implemented
pass pass
def test_embedding(self): def test_embedding(self):
# TODO the fields of encoding_format, dimensions, user are skipped # TODO: the fields of encoding_format, dimensions, user are skipped
# TODO support use_list_input # TODO: support use_list_input
for use_list_input in [False, True]: for use_list_input in [False, True]:
for token_input in [False, True]: for token_input in [False, True]:
self.run_embedding(use_list_input, token_input) self.run_embedding(use_list_input, token_input)
...@@ -80,9 +77,4 @@ class TestOpenAIServer(unittest.TestCase): ...@@ -80,9 +77,4 @@ class TestOpenAIServer(unittest.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
unittest.main(warnings="ignore") unittest.main()
# t = TestOpenAIServer()
# t.setUpClass()
# t.test_embedding()
# t.tearDownClass()
...@@ -32,9 +32,4 @@ class TestAccuracy(unittest.TestCase): ...@@ -32,9 +32,4 @@ class TestAccuracy(unittest.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
unittest.main(warnings="ignore") unittest.main()
# t = TestAccuracy()
# t.setUpClass()
# t.test_mmlu()
# t.tearDownClass()
...@@ -44,4 +44,4 @@ class TestDownloadFromModelScope(unittest.TestCase): ...@@ -44,4 +44,4 @@ class TestDownloadFromModelScope(unittest.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
unittest.main(warnings="ignore") unittest.main()
...@@ -399,9 +399,4 @@ class TestOpenAIServer(unittest.TestCase): ...@@ -399,9 +399,4 @@ class TestOpenAIServer(unittest.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
unittest.main(warnings="ignore") unittest.main()
# t = TestOpenAIServer()
# t.setUpClass()
# t.test_completion()
# t.tearDownClass()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment