Commit d8a060dd authored by zhuwenwen's avatar zhuwenwen
Browse files

set default block_size to 16

parent 66092265
......@@ -1630,7 +1630,7 @@ PrefixCachingHashAlgo = Literal["builtin", "sha256", "sha256_cbor_64bit"]
class CacheConfig:
"""Configuration for the KV cache."""
block_size: BlockSize = None if not envs.VLLM_USE_FLASH_ATTN_PA else 64 # type: ignore
block_size: BlockSize = 16 if not envs.VLLM_USE_FLASH_ATTN_PA else 64 # type: ignore
"""Size of a contiguous cache block in number of tokens. This is ignored on
neuron devices and set to `--max-model-len`. On CUDA devices, only block
sizes up to 32 are supported. On HPU devices, block size defaults to 128.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment