Unverified Commit 619d3de8 authored by Chengji Yao's avatar Chengji Yao Committed by GitHub
Browse files

[TPU] [V1] fix cases when max_num_reqs is set smaller than MIN_NUM_SEQS (#15583)


Signed-off-by: default avatarChengji Yao <chengjiyao@google.com>
parent ecff8309
......@@ -14,10 +14,7 @@ answers = [
]
N = 1
# Currently, top-p sampling is disabled. `top_p` should be 1.0.
sampling_params = SamplingParams(temperature=0.7,
top_p=1.0,
n=N,
max_tokens=16)
sampling_params = SamplingParams(temperature=0, top_p=1.0, n=N, max_tokens=16)
# Set `enforce_eager=True` to avoid ahead-of-time compilation.
# In real workloads, `enforace_eager` should be `False`.
......
......@@ -88,7 +88,7 @@ class TPUModelRunner:
self.max_model_len = model_config.max_model_len
self.max_num_blocks_per_req = cdiv(self.max_model_len, self.block_size)
self.max_num_tokens = scheduler_config.max_num_batched_tokens
self.max_num_reqs = scheduler_config.max_num_seqs
self.max_num_reqs = max(scheduler_config.max_num_seqs, MIN_NUM_SEQS)
# Model-related.
self.num_attn_layers = model_config.get_num_layers_by_block_type(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment