"vllm/engine/tokenizer_utils.py" did not exist on "e86717833da1216222cf0d490c2e3ba198610b13"
Commit 85e8224c authored by zhuwenwen's avatar zhuwenwen
Browse files

update paged_attn.py

parent b3ab1cdc
......@@ -201,7 +201,7 @@ class PagedAttention:
print(f"query.shape = {query.shape}, key_cache.shape = {key_cache.shape}, value_cache.shape = {value_cache.shape}")
print(f"num_kv_heads = {num_kv_heads}, scale = {scale:.3f}, block_tables.shape = {block_tables.shape}, seq_lens.shape = {seq_lens.shape}, block_size = {block_size}, max_seq_len = {max_seq_len}")
if envs.VLLM_USE_OPT_OP:
if envs.VLLM_USE_OPT_OP and max_seq_len<8192:
ops.paged_attention_v2_opt(
output,
exp_sums,
......
......@@ -358,6 +358,7 @@ class BaiChuanBaseForCausalLM(nn.Module, SupportsLoRA):
self.use_llama_nn = os.environ.get('LLAMA_NN') == '1'
self.use_gemm_pad = os.environ.get('GEMM_PAD') == '1'
self.use_fa_pad = os.environ.get('FA_PAD') == '1'
self.use_awq_pad = os.environ.get('AWQ_PAD') == '1'
def forward(
self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment