"docs/vscode:/vscode.git/clone" did not exist on "d95b993427234c094d204ef703b154da788df261"
Commit 09e6e2aa authored by zhaochenyang20's avatar zhaochenyang20
Browse files

Merge branch 'main' of github.com:sgl-project/sglang

parents 35bdb485 fad29f7f
...@@ -361,9 +361,13 @@ class BatchStrOut: ...@@ -361,9 +361,13 @@ class BatchStrOut:
output_ids: Optional[List[int]] output_ids: Optional[List[int]]
# Token counts # Token counts
# real input and output tokens can be get from
# origin_input_ids and output_ids by enabling --return_token_ids
# TODO (Shuai): Rename this to clarify the meaning.
prompt_tokens: List[int] prompt_tokens: List[int]
completion_tokens: List[int] completion_tokens: List[int]
cached_tokens: List[int] cached_tokens: List[int]
# Logprobs # Logprobs
input_token_logprobs_val: List[float] input_token_logprobs_val: List[float]
input_token_logprobs_idx: List[int] input_token_logprobs_idx: List[int]
......
...@@ -3,16 +3,15 @@ import unittest ...@@ -3,16 +3,15 @@ import unittest
from transformers import AutoTokenizer from transformers import AutoTokenizer
import sglang as sgl import sglang as sgl
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST
class TestEngineTokenIds(unittest.TestCase): class TestEngineTokenIds(unittest.TestCase):
def test_token_ids_in_generate(self): def test_token_ids_in_generate(self):
llm = sgl.Engine( llm = sgl.Engine(
model_path="meta-llama/Meta-Llama-3.1-8B-Instruct", return_token_ids=True model_path=DEFAULT_SMALL_MODEL_NAME_FOR_TEST, return_token_ids=True
)
tokenizer = AutoTokenizer.from_pretrained(
"meta-llama/Meta-Llama-3.1-8B-Instruct"
) )
tokenizer = AutoTokenizer.from_pretrained(DEFAULT_SMALL_MODEL_NAME_FOR_TEST)
prompts = [ prompts = [
"Hello, my name is", "Hello, my name is",
...@@ -20,37 +19,21 @@ class TestEngineTokenIds(unittest.TestCase): ...@@ -20,37 +19,21 @@ class TestEngineTokenIds(unittest.TestCase):
"The capital of France is", "The capital of France is",
"The future of AI is", "The future of AI is",
] ]
sampling_params = {"temperature": 0.8, "top_p": 0.95} sampling_params = {"temperature": 0, "top_p": 0.95}
outputs = llm.generate(prompts, sampling_params) outputs = llm.generate(prompts, sampling_params)
# Hugging Face tokenizer has a start token in its output,
# while SGLang only adds next_token_id in output_ids.
# We remove start token in HF output for comparison.
for prompt, output in zip(prompts, outputs): for prompt, output in zip(prompts, outputs):
hf_input_ids = tokenizer.encode(prompt) # SGLang's input_ids has a start token, so we remove it for comparison.
self.assertEqual( deocode_input = tokenizer.decode(output["input_ids"][1:])
output["input_ids"], assert (
hf_input_ids, deocode_input in prompt
f"Input token IDs mismatch for: {prompt}", ), f"Decode input: {deocode_input} mismatch for: {prompt}"
)
# SGLang's output_ids does not have a start token.
hf_output_ids = tokenizer.encode(output["text"])[1:] # remove start token deocode_output = tokenizer.decode(output["output_ids"])
self.assertEqual( assert (
output["output_ids"], deocode_output in output["text"]
hf_output_ids, ), f"Decode output: {deocode_output} mismatch for: {output['text']}"
f"Output token IDs mismatch for: {output['text']}",
)
self.assertEqual(
len(output["input_ids"]),
output["meta_info"]["prompt_tokens"],
"Prompt token count mismatch",
)
self.assertEqual(
len(output["output_ids"]),
output["meta_info"]["completion_tokens"],
"Completion token count mismatch",
)
llm.shutdown() llm.shutdown()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment