import json import unittest import sglang as sgl from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST class TestSRTBackend(unittest.TestCase): def test_engine_runtime_consistency(self): prompt = "Today is a sunny day and I like" model_path = DEFAULT_MODEL_NAME_FOR_TEST sampling_params = {"temperature": 0, "max_new_tokens": 8} engine = sgl.Engine(model_path=model_path, random_seed=42) out1 = engine.generate(prompt, sampling_params)["text"] engine.shutdown() runtime = sgl.Runtime(model_path=model_path, random_seed=42) out2 = json.loads(runtime.generate(prompt, sampling_params))["text"] runtime.shutdown() print("==== Answer 1 ====") print(out1) print("==== Answer 2 ====") print(out2) assert out1 == out2, f"{out1} != {out2}" def test_engine_multiple_generate(self): # just to ensure there is no issue running multiple generate calls prompt = "Today is a sunny day and I like" model_path = DEFAULT_MODEL_NAME_FOR_TEST sampling_params = {"temperature": 0, "max_new_tokens": 8} engine = sgl.Engine(model_path=model_path, random_seed=42) engine.generate(prompt, sampling_params) engine.generate(prompt, sampling_params) engine.shutdown() if __name__ == "__main__": unittest.main()