Unverified Commit 26868443 authored by Liangsheng Yin's avatar Liangsheng Yin Committed by GitHub
Browse files

Use min new token ratio at start (#701)

parent 824a77d0
...@@ -161,15 +161,12 @@ class ModelTpServer: ...@@ -161,15 +161,12 @@ class ModelTpServer:
assert ( assert (
server_args.schedule_conservativeness >= 0 server_args.schedule_conservativeness >= 0
), "Invalid schedule_conservativeness" ), "Invalid schedule_conservativeness"
self.new_token_ratio = min(
global_config.base_new_token_ratio * server_args.schedule_conservativeness,
1.0,
)
self.min_new_token_ratio = min( self.min_new_token_ratio = min(
global_config.base_min_new_token_ratio global_config.base_min_new_token_ratio
* server_args.schedule_conservativeness, * server_args.schedule_conservativeness,
1.0, 1.0,
) )
self.new_token_ratio = self.min_new_token_ratio
self.new_token_ratio_decay = global_config.new_token_ratio_decay self.new_token_ratio_decay = global_config.new_token_ratio_decay
self.new_token_ratio_recovery = global_config.new_token_ratio_recovery self.new_token_ratio_recovery = global_config.new_token_ratio_recovery
......
...@@ -29,7 +29,7 @@ class ServerArgs: ...@@ -29,7 +29,7 @@ class ServerArgs:
max_prefill_tokens: Optional[int] = None max_prefill_tokens: Optional[int] = None
max_running_requests: Optional[int] = None max_running_requests: Optional[int] = None
schedule_heuristic: str = "lpm" schedule_heuristic: str = "lpm"
schedule_conservativeness: float = 0.8 schedule_conservativeness: float = 1.0
# Other runtime options # Other runtime options
tp_size: int = 1 tp_size: int = 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment