Unverified Commit 2a4ec908 authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

Fix for breaking changes in xformers 0.0.21 (#834)

parent 85ebcda9
...@@ -5,7 +5,7 @@ sentencepiece # Required for LLaMA tokenizer. ...@@ -5,7 +5,7 @@ sentencepiece # Required for LLaMA tokenizer.
numpy numpy
torch >= 2.0.0 torch >= 2.0.0
transformers >= 4.31.0 # Required for LLaMA-2. transformers >= 4.31.0 # Required for LLaMA-2.
xformers >= 0.0.19 xformers >= 0.0.21
fastapi fastapi
uvicorn uvicorn
pydantic < 2 # Required for OpenAI server. pydantic < 2 # Required for OpenAI server.
...@@ -357,11 +357,12 @@ class PagedAttentionWithALiBi(PagedAttention): ...@@ -357,11 +357,12 @@ class PagedAttentionWithALiBi(PagedAttention):
# be sliced from a tensor whose length is a multiple of 8. # be sliced from a tensor whose length is a multiple of 8.
padded_len = (prompt_len + 7) // 8 * 8 padded_len = (prompt_len + 7) // 8 * 8
bias = torch.empty( bias = torch.empty(
1, # batch_size
self.num_heads, self.num_heads,
padded_len, prompt_len,
padded_len, padded_len,
device=self.alibi_slopes.device, device=self.alibi_slopes.device,
)[:, :prompt_len, :prompt_len].copy_(bias) )[:, :, :, :prompt_len].copy_(bias)
bias.mul_(self.alibi_slopes[:, None, None]) bias.mul_(self.alibi_slopes[:, None, None])
attn_bias = LowerTriangularMaskWithTensorBias(bias) attn_bias = LowerTriangularMaskWithTensorBias(bias)
input_metadata.attn_bias.append(attn_bias) input_metadata.attn_bias.append(attn_bias)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment