Commit 6d25880c authored by zhuwenwen's avatar zhuwenwen
Browse files

set default_max_num_batched_tokens = 10240

parent 1dc70839
......@@ -1664,7 +1664,7 @@ class EngineArgs:
# TODO(woosuk): Tune the default values for other hardware.
default_max_num_batched_tokens = {
UsageContext.LLM_CLASS: 8192,
UsageContext.OPENAI_API_SERVER: 2048,
UsageContext.OPENAI_API_SERVER: 10240,
}
default_max_num_seqs = {
UsageContext.LLM_CLASS: 256,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment