Unverified Commit 77b6e74f authored by vllmellm's avatar vllmellm Committed by GitHub
Browse files

[ROCm] Remove unnecessary assertion of max_model_len in ROCM_AITER_MLA attention backend. (#18938)


Signed-off-by: default avatarvllmellm <vllm.ellm@embeddedllm.com>
parent 5acf828d
...@@ -132,8 +132,6 @@ class AiterMLAMetadataBuilder(MLACommonMetadataBuilder[AiterMLAMetadata]): ...@@ -132,8 +132,6 @@ class AiterMLAMetadataBuilder(MLACommonMetadataBuilder[AiterMLAMetadata]):
def __init__(self, input_builder: "ModelInputForGPUBuilder"): def __init__(self, input_builder: "ModelInputForGPUBuilder"):
super().__init__(input_builder) super().__init__(input_builder)
assert self.runner.model_config.max_model_len == 32768,\
"AITER MLA requires max model len to be set to 32768"
assert self.block_size == 1, "AITER MLA requires only block size 1." assert self.block_size == 1, "AITER MLA requires only block size 1."
def prepare(self): def prepare(self):
......
...@@ -66,9 +66,6 @@ class AiterMLAMetadataBuilder(MLACommonMetadataBuilder[AiterMLAMetadata]): ...@@ -66,9 +66,6 @@ class AiterMLAMetadataBuilder(MLACommonMetadataBuilder[AiterMLAMetadata]):
def __init__(self, runner, kv_cache_spec: AttentionSpec, def __init__(self, runner, kv_cache_spec: AttentionSpec,
block_table: BlockTable): block_table: BlockTable):
super().__init__(runner, kv_cache_spec, block_table) super().__init__(runner, kv_cache_spec, block_table)
max_model_len = self.runner.model_config.max_model_len
assert max_model_len == 32768,\
"AITER MLA requires max_model_len=32768"
assert self.kv_cache_spec.block_size == 1, "AITER MLA" \ assert self.kv_cache_spec.block_size == 1, "AITER MLA" \
"only supports block size 1." "only supports block size 1."
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment