Commit 6d25880c authored by zhuwenwen's avatar zhuwenwen
Browse files

set default_max_num_batched_tokens = 10240

parent 1dc70839
...@@ -1664,7 +1664,7 @@ class EngineArgs: ...@@ -1664,7 +1664,7 @@ class EngineArgs:
# TODO(woosuk): Tune the default values for other hardware. # TODO(woosuk): Tune the default values for other hardware.
default_max_num_batched_tokens = { default_max_num_batched_tokens = {
UsageContext.LLM_CLASS: 8192, UsageContext.LLM_CLASS: 8192,
UsageContext.OPENAI_API_SERVER: 2048, UsageContext.OPENAI_API_SERVER: 10240,
} }
default_max_num_seqs = { default_max_num_seqs = {
UsageContext.LLM_CLASS: 256, UsageContext.LLM_CLASS: 256,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment