Commit 65bb0ebc authored by zhuwenwen's avatar zhuwenwen
Browse files

update utils.py

parent a3f4b5b8
......@@ -303,7 +303,7 @@ def bind_kv_cache(
# TODO - analyze where runner_kv_caches is used and the right
# way to ensure it properly reflects multiple attention layers
# in the same decoder block.
if current_platform.is_cuda() or current_platform.is_xpu():
if current_platform.is_cuda() or current_platform.is_rocm() or current_platform.is_xpu():
# We know that the GPU runner is not impacted by this
# case. Some test code depends on runner_kv_caches, but
# not in a way that's impacted by ignoring this.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment