Commit 0ff29dbf authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.9.2-dev' into v0.9.2-dev-ds

parents e0ba23b5 8c0143db
......@@ -916,7 +916,7 @@ class DeepseekScalingRotaryEmbedding(RotaryEmbedding):
direct_register_custom_op(
op_name="rotary_embedding_deepseek_fuse",
op_func=rotary_embedding_deepseek_fuse,
mutates_args=[],
mutates_args=["query", "key"],
fake_impl=rotary_embedding_deepseek_fuse_fake,
)
......@@ -958,8 +958,7 @@ class DeepseekScalingRotaryEmbedding(RotaryEmbedding):
BLOCK_SIZE=BLOCK_SIZE,
num_warps=1)
# if envs.VLLM_USE_LIGHTOP:
if False:
if envs.VLLM_USE_LIGHTOP:
torch.ops.vllm.rotary_embedding_deepseek_fuse(positions, query, key, self.head_size, self.cos_sin_cache, self.is_neox_style)
else:
call(query)
......
......@@ -16,6 +16,14 @@ from vllm.utils import cuda_device_count_stateless
from .interface import DeviceCapability, Platform, PlatformEnum, _Backend
from vllm.utils import SUPPORT_TC
if not SUPPORT_TC:
os.environ['VLLM_USE_V1'] = '0'
os.environ['VLLM_USE_FLASH_ATTN_PA'] = '0'
os.environ['VLLM_USE_FLASH_MLA'] = '0'
if TYPE_CHECKING:
from vllm.config import ModelConfig, VllmConfig
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment