Commit 6493256b authored by Lianmin Zheng's avatar Lianmin Zheng
Browse files

improve print

parent 06008bc2
...@@ -4,7 +4,7 @@ from typing import Callable, List, Optional, Union ...@@ -4,7 +4,7 @@ from typing import Callable, List, Optional, Union
import numpy as np import numpy as np
from sglang.backend.base_backend import BaseBackend from sglang.backend.base_backend import BaseBackend
from sglang.lang.chat_template import get_chat_template_by_model_path, ChatTemplate from sglang.lang.chat_template import ChatTemplate, get_chat_template_by_model_path
from sglang.lang.interpreter import StreamExecutor from sglang.lang.interpreter import StreamExecutor
from sglang.lang.ir import SglSamplingParams from sglang.lang.ir import SglSamplingParams
...@@ -41,11 +41,15 @@ INSTRUCT_MODEL_NAMES = [ ...@@ -41,11 +41,15 @@ INSTRUCT_MODEL_NAMES = [
class OpenAI(BaseBackend): class OpenAI(BaseBackend):
def __init__(self, model_name: str, def __init__(
is_chat_model: Optional[bool] = None, self,
chat_template: Optional[ChatTemplate] = None, model_name: str,
is_azure: bool = False, is_chat_model: Optional[bool] = None,
*args, **kwargs): chat_template: Optional[ChatTemplate] = None,
is_azure: bool = False,
*args,
**kwargs,
):
super().__init__() super().__init__()
if isinstance(openai, Exception): if isinstance(openai, Exception):
...@@ -63,7 +67,9 @@ class OpenAI(BaseBackend): ...@@ -63,7 +67,9 @@ class OpenAI(BaseBackend):
self.tokenizer = tiktoken.get_encoding("cl100k_base") self.tokenizer = tiktoken.get_encoding("cl100k_base")
self.logit_bias_int = create_logit_bias_int(self.tokenizer) self.logit_bias_int = create_logit_bias_int(self.tokenizer)
self.chat_template = chat_template or get_chat_template_by_model_path(model_name) self.chat_template = chat_template or get_chat_template_by_model_path(
model_name
)
if is_chat_model is not None: if is_chat_model is not None:
self.is_chat_model = is_chat_model self.is_chat_model = is_chat_model
......
...@@ -208,6 +208,19 @@ class ModelRpcServer(rpyc.Service): ...@@ -208,6 +208,19 @@ class ModelRpcServer(rpyc.Service):
if self.out_pyobjs and self.running_batch.reqs[0].stream: if self.out_pyobjs and self.running_batch.reqs[0].stream:
break break
if self.running_batch is not None and self.tp_rank == 0:
if self.decode_forward_ct % 40 == 0:
num_used = self.max_total_num_token - (
self.token_to_kv_pool.available_size()
+ self.tree_cache.evictable_size()
)
logger.info(
f"#running-req: {len(self.running_batch.reqs)}, "
f"#token: {num_used}, "
f"token usage: {num_used / self.max_total_num_token:.2f}, "
f"#queue-req: {len(self.forward_queue)}"
)
else: else:
# check the available size # check the available size
available_size = ( available_size = (
...@@ -221,19 +234,6 @@ class ModelRpcServer(rpyc.Service): ...@@ -221,19 +234,6 @@ class ModelRpcServer(rpyc.Service):
"KV cache pool leak detected!" "KV cache pool leak detected!"
) )
if self.running_batch is not None and self.tp_rank == 0:
if self.decode_forward_ct % 20 == 0:
num_used = self.max_total_num_token - (
self.token_to_kv_pool.available_size()
+ self.tree_cache.evictable_size()
)
logger.info(
f"#running-req: {len(self.running_batch.reqs)}, "
f"#token: {num_used}, "
f"token usage: {num_used / self.max_total_num_token:.2f}, "
f"#queue-req: {len(self.forward_queue)}"
)
def handle_generate_request( def handle_generate_request(
self, self,
recv_req: TokenizedGenerateReqInput, recv_req: TokenizedGenerateReqInput,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment