Commit 98a008db authored by zhuwenwen's avatar zhuwenwen
Browse files

set DEFAULT_MAX_NUM_BATCHED_TOKENS=10240

parent 2c8a16d6
......@@ -81,7 +81,7 @@ logger = init_logger(__name__)
# This value is chosen to have a balance between ITL and TTFT. Note it is
# not optimized for throughput.
DEFAULT_MAX_NUM_BATCHED_TOKENS = 2048
DEFAULT_MAX_NUM_BATCHED_TOKENS = 10240
POOLING_MODEL_MAX_NUM_BATCHED_TOKENS = 32768
MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS = 5120
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment