Unverified Commit fe3462c7 authored by Yan Ma's avatar Yan Ma Committed by GitHub
Browse files

[XPU][Bugfix] minor fix for XPU (#15591)


Signed-off-by: default avataryan ma <yan.ma@intel.com>
parent 3b34fd52
...@@ -23,6 +23,8 @@ Currently, there are no pre-built XPU wheels. ...@@ -23,6 +23,8 @@ Currently, there are no pre-built XPU wheels.
- Second, install Python packages for vLLM XPU backend building: - Second, install Python packages for vLLM XPU backend building:
```console ```console
git clone https://github.com/vllm-project/vllm.git
cd vllm
pip install --upgrade pip pip install --upgrade pip
pip install -v -r requirements/xpu.txt pip install -v -r requirements/xpu.txt
``` ```
......
...@@ -220,8 +220,8 @@ class IpexAttnBackendImpl(AttentionImpl[IpexAttnMetadata]): ...@@ -220,8 +220,8 @@ class IpexAttnBackendImpl(AttentionImpl[IpexAttnMetadata]):
value_cache, value_cache,
attn_metadata.slot_mapping.flatten(), attn_metadata.slot_mapping.flatten(),
self.kv_cache_dtype, self.kv_cache_dtype,
layer._k_scale, layer._k_scale_float,
layer._v_scale, layer._v_scale_float,
) )
if attn_metadata.is_prompt: if attn_metadata.is_prompt:
...@@ -306,8 +306,8 @@ class IpexAttnBackendImpl(AttentionImpl[IpexAttnMetadata]): ...@@ -306,8 +306,8 @@ class IpexAttnBackendImpl(AttentionImpl[IpexAttnMetadata]):
max_seq_len, max_seq_len,
self.alibi_slopes, self.alibi_slopes,
self.kv_cache_dtype, self.kv_cache_dtype,
layer._k_scale, layer._k_scale_float,
layer._v_scale, layer._v_scale_float,
) )
else: else:
# Run PagedAttention V2. # Run PagedAttention V2.
...@@ -339,8 +339,8 @@ class IpexAttnBackendImpl(AttentionImpl[IpexAttnMetadata]): ...@@ -339,8 +339,8 @@ class IpexAttnBackendImpl(AttentionImpl[IpexAttnMetadata]):
max_seq_len, max_seq_len,
self.alibi_slopes, self.alibi_slopes,
self.kv_cache_dtype, self.kv_cache_dtype,
layer._k_scale, layer._k_scale_float,
layer._v_scale, layer._v_scale_float,
) )
# Reshape the output tensor. # Reshape the output tensor.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment