Commit 77f7bb45 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.8.2-ori-wm' into 'v0.8.2-ori'

[fix]修复模型注册失败及其他报错

See merge request dcutoolkit/deeplearing/vllm!92
parents 31f6b24f 1a397b82
...@@ -814,8 +814,8 @@ def invoke_fused_moe_kernel(A: torch.Tensor, ...@@ -814,8 +814,8 @@ def invoke_fused_moe_kernel(A: torch.Tensor,
sorted_token_ids, sorted_token_ids,
expert_ids, expert_ids,
num_tokens_post_padded, num_tokens_post_padded,
B.shape[1] if not use_nn_moe else B.shape[2], B.shape[1] if not use_nn_moe else B.shape[2],
A.shape[2], A.shape[1],
EM, EM,
topk_ids.numel(), topk_ids.numel(),
A.stride(0), A.stride(0),
...@@ -1178,7 +1178,7 @@ def inplace_fused_experts(hidden_states: torch.Tensor, ...@@ -1178,7 +1178,7 @@ def inplace_fused_experts(hidden_states: torch.Tensor,
w2: torch.Tensor, w2: torch.Tensor,
topk_weights: torch.Tensor, topk_weights: torch.Tensor,
topk_ids: torch.Tensor, topk_ids: torch.Tensor,
activation: str = "silu", activation: Optional[str] = None,
use_fp8_w8a8: bool = False, use_fp8_w8a8: bool = False,
use_int8_w8a16: bool = False, use_int8_w8a16: bool = False,
use_int4_w4a16: bool = False, use_int4_w4a16: bool = False,
...@@ -1205,7 +1205,7 @@ def inplace_fused_experts_fake( ...@@ -1205,7 +1205,7 @@ def inplace_fused_experts_fake(
w2: torch.Tensor, w2: torch.Tensor,
topk_weights: torch.Tensor, topk_weights: torch.Tensor,
topk_ids: torch.Tensor, topk_ids: torch.Tensor,
activation: str = "silu", activation: Optional[str] = None,
use_fp8_w8a8: bool = False, use_fp8_w8a8: bool = False,
use_int8_w8a16: bool = False, use_int8_w8a16: bool = False,
use_int4_w4a16: bool = False, use_int4_w4a16: bool = False,
...@@ -1218,7 +1218,7 @@ def inplace_fused_experts_fake( ...@@ -1218,7 +1218,7 @@ def inplace_fused_experts_fake(
a1_scale: Optional[torch.Tensor] = None, a1_scale: Optional[torch.Tensor] = None,
a2_scale: Optional[torch.Tensor] = None, a2_scale: Optional[torch.Tensor] = None,
block_shape: Optional[List[int]] = None, block_shape: Optional[List[int]] = None,
use_nn_moe: Optional[bool] = False,) -> None: use_nn_moe: Optional[bool] = False) -> None:
pass pass
...@@ -1236,7 +1236,7 @@ def outplace_fused_experts( ...@@ -1236,7 +1236,7 @@ def outplace_fused_experts(
w2: torch.Tensor, w2: torch.Tensor,
topk_weights: torch.Tensor, topk_weights: torch.Tensor,
topk_ids: torch.Tensor, topk_ids: torch.Tensor,
activation: str = "silu", activation: Optional[str] = None,
use_fp8_w8a8: bool = False, use_fp8_w8a8: bool = False,
use_int8_w8a16: bool = False, use_int8_w8a16: bool = False,
use_int4_w4a16: bool = False, use_int4_w4a16: bool = False,
...@@ -1263,7 +1263,7 @@ def outplace_fused_experts_fake( ...@@ -1263,7 +1263,7 @@ def outplace_fused_experts_fake(
w2: torch.Tensor, w2: torch.Tensor,
topk_weights: torch.Tensor, topk_weights: torch.Tensor,
topk_ids: torch.Tensor, topk_ids: torch.Tensor,
activation: str = "silu", activation: Optional[str] = None,
use_fp8_w8a8: bool = False, use_fp8_w8a8: bool = False,
use_int8_w8a16: bool = False, use_int8_w8a16: bool = False,
use_int4_w4a16: bool = False, use_int4_w4a16: bool = False,
......
...@@ -685,7 +685,7 @@ class FusedMoE(torch.nn.Module): ...@@ -685,7 +685,7 @@ class FusedMoE(torch.nn.Module):
# is_transposed: if the dim to shard the weight # is_transposed: if the dim to shard the weight
# should be flipped. Required by GPTQ, compressed-tensors # should be flipped. Required by GPTQ, compressed-tensors
# should be whatever dimension intermediate_size_per_partition is # should be whatever dimension intermediate_size_per_partition is
s_transposed = getattr(param, "is_transposed", False) or self.use_nn_moe is_transposed = getattr(param, "is_transposed", False) or self.use_nn_moe
shard_dim = SHARD_ID_TO_SHARDED_DIM[shard_id] shard_dim = SHARD_ID_TO_SHARDED_DIM[shard_id]
if is_transposed: if is_transposed:
shard_dim = int(not shard_dim) shard_dim = int(not shard_dim)
......
...@@ -61,12 +61,12 @@ _ROCM_PARTIALLY_SUPPORTED_MODELS: Dict[str, str] = { ...@@ -61,12 +61,12 @@ _ROCM_PARTIALLY_SUPPORTED_MODELS: Dict[str, str] = {
} }
# Prevent use of clashing `{CUDA/HIP}_VISIBLE_DEVICES`` # Prevent use of clashing `{CUDA/HIP}_VISIBLE_DEVICES``
if "HIP_VISIBLE_DEVICES" in os.environ: # if "HIP_VISIBLE_DEVICES" in os.environ:
val = os.environ["HIP_VISIBLE_DEVICES"] # val = os.environ["HIP_VISIBLE_DEVICES"]
if cuda_val := os.environ.get("CUDA_VISIBLE_DEVICES", None): # if cuda_val := os.environ.get("CUDA_VISIBLE_DEVICES", None):
assert val == cuda_val # assert val == cuda_val
else: # else:
os.environ["CUDA_VISIBLE_DEVICES"] = val # os.environ["CUDA_VISIBLE_DEVICES"] = val
# AMDSMI utils # AMDSMI utils
# Note that NVML is not affected by `{CUDA/HIP}_VISIBLE_DEVICES`, # Note that NVML is not affected by `{CUDA/HIP}_VISIBLE_DEVICES`,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment