Commit aad58f06 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.6.2-dev_wm' into 'v0.6.2-dev'

修复单测中test_long_context、test_attention部分错误

See merge request dcutoolkit/deeplearing/vllm!33
parents 70c661da 137e8a16
...@@ -206,7 +206,7 @@ def test_paged_attention( ...@@ -206,7 +206,7 @@ def test_paged_attention(
opcheck(torch.ops._C.paged_attention_v1, opcheck(torch.ops._C.paged_attention_v1,
(output, query, key_cache, value_cache, num_kv_heads, scale, (output, query, key_cache, value_cache, num_kv_heads, scale,
block_tables, seq_lens, block_size, max_seq_len, alibi_slopes, block_tables, seq_lens, block_size, max_seq_len, alibi_slopes,
kv_cache_dtype, k_scale, v_scale, 0, 0, 0, 64, 0), kv_cache_dtype, k_scale, v_scale, 0, 0, 0, 64, 0, None, 0),
cond=(head_size == HEAD_SIZES[0] cond=(head_size == HEAD_SIZES[0]
and block_size == BLOCK_SIZES[0])) and block_size == BLOCK_SIZES[0]))
...@@ -248,7 +248,7 @@ def test_paged_attention( ...@@ -248,7 +248,7 @@ def test_paged_attention(
(output, exp_sums, max_logits, tmp_output, query, (output, exp_sums, max_logits, tmp_output, query,
key_cache, value_cache, num_kv_heads, scale, block_tables, key_cache, value_cache, num_kv_heads, scale, block_tables,
seq_lens, block_size, max_seq_len, alibi_slopes, seq_lens, block_size, max_seq_len, alibi_slopes,
kv_cache_dtype, k_scale, v_scale, 0, 0, 0, 64, 0), kv_cache_dtype, k_scale, v_scale, 0, 0, 0, 64, 0, None, 0),
cond=(head_size == HEAD_SIZES[0] cond=(head_size == HEAD_SIZES[0]
and block_size == BLOCK_SIZES[0])) and block_size == BLOCK_SIZES[0]))
......
...@@ -111,7 +111,7 @@ def lora_llm(long_context_infos): ...@@ -111,7 +111,7 @@ def lora_llm(long_context_infos):
llm = vllm.LLM("meta-llama/Llama-2-13b-chat-hf", llm = vllm.LLM("meta-llama/Llama-2-13b-chat-hf",
enable_lora=True, enable_lora=True,
max_num_seqs=16, max_num_seqs=16,
max_loras=2, max_loras=8,
long_lora_scaling_factors=tuple(scaling_factors), long_lora_scaling_factors=tuple(scaling_factors),
max_num_batched_tokens=4096 * 8, max_num_batched_tokens=4096 * 8,
tensor_parallel_size=4, tensor_parallel_size=4,
......
...@@ -20,7 +20,7 @@ from vllm.utils import seed_everything ...@@ -20,7 +20,7 @@ from vllm.utils import seed_everything
from .utils import (generate_data, generate_data_for_expand_nslices, from .utils import (generate_data, generate_data_for_expand_nslices,
ref_torch_groupgemm) ref_torch_groupgemm)
HIDDEN_SIZES = [4097] HIDDEN_SIZES = [1024]
BATCHES = [1, 4, 16, 32] BATCHES = [1, 4, 16, 32]
NUM_LORA = [1, 8, 32, 128] NUM_LORA = [1, 8, 32, 128]
......
...@@ -83,7 +83,9 @@ class LoRARequest( ...@@ -83,7 +83,9 @@ class LoRARequest(
and comparison lora adapter across engines. and comparison lora adapter across engines.
""" """
return isinstance(value, return isinstance(value,
self.__class__) and self.lora_name == value.lora_name self.__class__) and self.lora_name == value.lora_name and \
self.lora_int_id == value.lora_int_id and \
self.lora_path == value.lora_path
def __hash__(self) -> int: def __hash__(self) -> int:
""" """
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment