Unverified Commit b30dfa03 authored by Matthew Bonanni's avatar Matthew Bonanni Committed by GitHub
Browse files

[Attention] Refactor CUDA attention backend selection logic (#24794)


Signed-off-by: default avatarMatthew Bonanni <mbonanni@redhat.com>
Signed-off-by: default avatarMatthew Bonanni <mbonanni001@gmail.com>
Co-authored-by: default avatarLuka Govedič <ProExpertProg@users.noreply.github.com>
parent 2e78150d
...@@ -4371,7 +4371,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): ...@@ -4371,7 +4371,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
""" """
for backend in backends: for backend in backends:
is_supported = False is_supported = False
for supported_size in backend.get_supported_kernel_block_size(): for supported_size in backend.supported_kernel_block_sizes:
if isinstance(supported_size, int): if isinstance(supported_size, int):
if block_size == supported_size: if block_size == supported_size:
is_supported = True is_supported = True
...@@ -4402,7 +4402,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): ...@@ -4402,7 +4402,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
all_int_supported_sizes = set( all_int_supported_sizes = set(
supported_size supported_size
for backend in backends for backend in backends
for supported_size in backend.get_supported_kernel_block_size() for supported_size in backend.supported_kernel_block_sizes
if isinstance(supported_size, int) if isinstance(supported_size, int)
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment