Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
77f7bb45
Commit
77f7bb45
authored
Apr 01, 2025
by
zhuwenwen
Browse files
Merge branch 'v0.8.2-ori-wm' into 'v0.8.2-ori'
[fix]修复模型注册失败及其他报错 See merge request dcutoolkit/deeplearing/vllm!92
parents
31f6b24f
1a397b82
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
14 additions
and
14 deletions
+14
-14
vllm/model_executor/layers/fused_moe/fused_moe.py
vllm/model_executor/layers/fused_moe/fused_moe.py
+7
-7
vllm/model_executor/layers/fused_moe/layer.py
vllm/model_executor/layers/fused_moe/layer.py
+1
-1
vllm/platforms/rocm.py
vllm/platforms/rocm.py
+6
-6
No files found.
vllm/model_executor/layers/fused_moe/fused_moe.py
View file @
77f7bb45
...
...
@@ -815,7 +815,7 @@ def invoke_fused_moe_kernel(A: torch.Tensor,
expert_ids
,
num_tokens_post_padded
,
B
.
shape
[
1
]
if
not
use_nn_moe
else
B
.
shape
[
2
],
A
.
shape
[
2
],
A
.
shape
[
1
],
EM
,
topk_ids
.
numel
(),
A
.
stride
(
0
),
...
...
@@ -1178,7 +1178,7 @@ def inplace_fused_experts(hidden_states: torch.Tensor,
w2
:
torch
.
Tensor
,
topk_weights
:
torch
.
Tensor
,
topk_ids
:
torch
.
Tensor
,
activation
:
str
=
"silu"
,
activation
:
Optional
[
str
]
=
None
,
use_fp8_w8a8
:
bool
=
False
,
use_int8_w8a16
:
bool
=
False
,
use_int4_w4a16
:
bool
=
False
,
...
...
@@ -1205,7 +1205,7 @@ def inplace_fused_experts_fake(
w2
:
torch
.
Tensor
,
topk_weights
:
torch
.
Tensor
,
topk_ids
:
torch
.
Tensor
,
activation
:
str
=
"silu"
,
activation
:
Optional
[
str
]
=
None
,
use_fp8_w8a8
:
bool
=
False
,
use_int8_w8a16
:
bool
=
False
,
use_int4_w4a16
:
bool
=
False
,
...
...
@@ -1218,7 +1218,7 @@ def inplace_fused_experts_fake(
a1_scale
:
Optional
[
torch
.
Tensor
]
=
None
,
a2_scale
:
Optional
[
torch
.
Tensor
]
=
None
,
block_shape
:
Optional
[
List
[
int
]]
=
None
,
use_nn_moe
:
Optional
[
bool
]
=
False
,
)
->
None
:
use_nn_moe
:
Optional
[
bool
]
=
False
)
->
None
:
pass
...
...
@@ -1236,7 +1236,7 @@ def outplace_fused_experts(
w2
:
torch
.
Tensor
,
topk_weights
:
torch
.
Tensor
,
topk_ids
:
torch
.
Tensor
,
activation
:
str
=
"silu"
,
activation
:
Optional
[
str
]
=
None
,
use_fp8_w8a8
:
bool
=
False
,
use_int8_w8a16
:
bool
=
False
,
use_int4_w4a16
:
bool
=
False
,
...
...
@@ -1263,7 +1263,7 @@ def outplace_fused_experts_fake(
w2
:
torch
.
Tensor
,
topk_weights
:
torch
.
Tensor
,
topk_ids
:
torch
.
Tensor
,
activation
:
str
=
"silu"
,
activation
:
Optional
[
str
]
=
None
,
use_fp8_w8a8
:
bool
=
False
,
use_int8_w8a16
:
bool
=
False
,
use_int4_w4a16
:
bool
=
False
,
...
...
vllm/model_executor/layers/fused_moe/layer.py
View file @
77f7bb45
...
...
@@ -685,7 +685,7 @@ class FusedMoE(torch.nn.Module):
# is_transposed: if the dim to shard the weight
# should be flipped. Required by GPTQ, compressed-tensors
# should be whatever dimension intermediate_size_per_partition is
s_transposed
=
getattr
(
param
,
"is_transposed"
,
False
)
or
self
.
use_nn_moe
i
s_transposed
=
getattr
(
param
,
"is_transposed"
,
False
)
or
self
.
use_nn_moe
shard_dim
=
SHARD_ID_TO_SHARDED_DIM
[
shard_id
]
if
is_transposed
:
shard_dim
=
int
(
not
shard_dim
)
...
...
vllm/platforms/rocm.py
View file @
77f7bb45
...
...
@@ -61,12 +61,12 @@ _ROCM_PARTIALLY_SUPPORTED_MODELS: Dict[str, str] = {
}
# Prevent use of clashing `{CUDA/HIP}_VISIBLE_DEVICES``
if
"HIP_VISIBLE_DEVICES"
in
os
.
environ
:
val
=
os
.
environ
[
"HIP_VISIBLE_DEVICES"
]
if
cuda_val
:
=
os
.
environ
.
get
(
"CUDA_VISIBLE_DEVICES"
,
None
):
assert
val
==
cuda_val
else
:
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
val
#
if "HIP_VISIBLE_DEVICES" in os.environ:
#
val = os.environ["HIP_VISIBLE_DEVICES"]
#
if cuda_val := os.environ.get("CUDA_VISIBLE_DEVICES", None):
#
assert val == cuda_val
#
else:
#
os.environ["CUDA_VISIBLE_DEVICES"] = val
# AMDSMI utils
# Note that NVML is not affected by `{CUDA/HIP}_VISIBLE_DEVICES`,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment