Unverified Commit 6e09cf6a authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Misc fixes (#432)

parent 72bb3443
```
pip install build twine
```
```
cd python
bash upload_pypi.sh
```
\ No newline at end of file
...@@ -81,3 +81,9 @@ python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port ...@@ -81,3 +81,9 @@ python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port
cd test/lang cd test/lang
python3 run_all.py python3 run_all.py
``` ```
## OpenAI API server
```
cd test/srt
python test_openai_server.py
```
\ No newline at end of file
import transformers import transformers
import code import code
name = "meta-llama/Llama-2-7b-chat-hf" #name = "meta-llama/Llama-2-7b-chat-hf"
name = "meta-llama/Meta-Llama-3-8B-Instruct"
t = transformers.AutoTokenizer.from_pretrained(name) t = transformers.AutoTokenizer.from_pretrained(name)
code.interact(local=locals()) code.interact(local=locals())
...@@ -426,7 +426,9 @@ class ModelRpcServer: ...@@ -426,7 +426,9 @@ class ModelRpcServer:
# Only transfer the selected logprobs of the next token to CPU to reduce overhead. # Only transfer the selected logprobs of the next token to CPU to reduce overhead.
if last_logprobs is not None: if last_logprobs is not None:
last_token_logprobs = ( last_token_logprobs = (
last_logprobs[torch.arange(len(batch.reqs)), next_token_ids].tolist() last_logprobs[
torch.arange(len(batch.reqs), device=next_token_ids.device),
next_token_ids].tolist()
) )
next_token_ids = next_token_ids.tolist() next_token_ids = next_token_ids.tolist()
...@@ -587,6 +589,7 @@ class ModelRpcServer: ...@@ -587,6 +589,7 @@ class ModelRpcServer:
- req.prompt_tokens, - req.prompt_tokens,
"completion_tokens_wo_jump_forward": req.completion_tokens_wo_jump_forward, "completion_tokens_wo_jump_forward": req.completion_tokens_wo_jump_forward,
"finish_reason": str(req.finish_reason), # FIXME: convert to the correct string "finish_reason": str(req.finish_reason), # FIXME: convert to the correct string
"hit_stop_str": req.hit_stop_str,
} }
if req.return_logprob: if req.return_logprob:
( (
......
...@@ -110,8 +110,8 @@ class InputMetadata: ...@@ -110,8 +110,8 @@ class InputMetadata:
self.kv_last_page_len = torch.ones( self.kv_last_page_len = torch.ones(
(self.batch_size,), dtype=torch.int32, device="cuda" (self.batch_size,), dtype=torch.int32, device="cuda"
) )
req_pool_indices_cpu = self.req_pool_indices.cpu().tolist() req_pool_indices_cpu = self.req_pool_indices.cpu().numpy()
seq_lens_cpu = self.seq_lens.tolist() seq_lens_cpu = self.seq_lens.cpu().numpy()
self.kv_indices = torch.cat( self.kv_indices = torch.cat(
[ [
self.req_to_token_pool.req_to_token[ self.req_to_token_pool.req_to_token[
......
...@@ -163,7 +163,7 @@ def test_regex(args): ...@@ -163,7 +163,7 @@ def test_regex(args):
regex = ( regex = (
r"""\{\n""" r"""\{\n"""
+ r""" "name": "[\w]+",\n""" + r""" "name": "[\w]+",\n"""
+ r""" "population": [\w\d\s]+\n""" + r""" "population": [\d]+\n"""
+ r"""\}""" + r"""\}"""
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment