Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
28021d6e
Commit
28021d6e
authored
Oct 31, 2025
by
zhuwenwen
Browse files
update moe configs name
parent
653b799b
Changes
68
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
8 additions
and
151 deletions
+8
-151
vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=BW3000.json
...yers/fused_moe/configs/E=8,N=3584,device_name=BW3000.json
+0
-146
vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=gfx928_120cu.json
...used_moe/configs/E=8,N=3584,device_name=gfx928_120cu.json
+0
-0
vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=gfx928_120cu_nn.json
...d_moe/configs/E=8,N=3584,device_name=gfx928_120cu_nn.json
+0
-0
vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=gfx936_80cu.json
...fused_moe/configs/E=8,N=3584,device_name=gfx936_80cu.json
+0
-0
vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=gfx936_80cu_nn.json
...ed_moe/configs/E=8,N=3584,device_name=gfx936_80cu_nn.json
+0
-0
vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=gfx928_120cu.json
...used_moe/configs/E=8,N=7168,device_name=gfx928_120cu.json
+0
-0
vllm/model_executor/layers/fused_moe/fused_moe.py
vllm/model_executor/layers/fused_moe/fused_moe.py
+6
-3
vllm/utils/__init__.py
vllm/utils/__init__.py
+2
-2
No files found.
vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=BW3000.json
deleted
100644 → 0
View file @
653b799b
{
"1"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"4"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"8"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"16"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"24"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"32"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"48"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"64"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"96"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"256"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
8
,
"num_stages"
:
2
},
"512"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"1024"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"1536"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
1
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"2048"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
16
,
"num_warps"
:
4
,
"num_stages"
:
2
},
"3072"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
16
,
"num_warps"
:
8
,
"num_stages"
:
2
},
"4096"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
16
,
"num_warps"
:
8
,
"num_stages"
:
2
}
}
vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=
K100_AI
.json
→
vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=
gfx928_120cu
.json
View file @
28021d6e
File moved
vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=
K100_AI
_nn.json
→
vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=
gfx928_120cu
_nn.json
View file @
28021d6e
File moved
vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=
BW200
.json
→
vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=
gfx936_80cu
.json
View file @
28021d6e
File moved
vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=
BW200
_nn.json
→
vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=
gfx936_80cu
_nn.json
View file @
28021d6e
File moved
vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=
K100_AI
.json
→
vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=
gfx928_120cu
.json
View file @
28021d6e
File moved
vllm/model_executor/layers/fused_moe/fused_moe.py
View file @
28021d6e
...
...
@@ -59,6 +59,9 @@ logger = init_logger(__name__)
if
envs
.
VLLM_USE_GLOBAL_CACHE13
:
moe_cache_singleton
=
None
arch_name
=
torch
.
cuda
.
get_device_properties
(
"cuda"
).
gcnArchName
.
split
(
':'
)[
0
]
arch_cu
=
torch
.
cuda
.
get_device_properties
(
torch
.
cuda
.
current_device
()).
multi_processor_count
@
torch
.
compile
def
moe_sum_reduce_torch_compile
(
x
,
out
,
routed_scaling_factor
):
...
...
@@ -1091,14 +1094,14 @@ def get_config_file_name(E: int,
N
:
int
,
dtype
:
Optional
[
str
],
block_shape
:
Optional
[
List
[
int
]]
=
None
,
use_nn_moe
:
Optional
[
bool
]
=
False
)
->
str
:
device_name
=
current_platform
.
get_device_name
().
replace
(
" "
,
"_"
)
#
device_name = current_platform.get_device_name().replace(" ", "_")
dtype_selector
=
""
if
not
dtype
else
f
",dtype=
{
dtype
}
"
block_shape_selector
=
(
""
if
not
block_shape
or
not
all
(
block_shape
)
else
f
",block_shape=
{
block_shape
}
"
).
replace
(
" "
,
""
)
if
not
use_nn_moe
:
return
f
"E=
{
E
}
,N=
{
N
}
,device_name=
{
device_name
}
{
dtype_selector
}{
block_shape_selector
}
.json"
# noqa: E501
return
f
"E=
{
E
}
,N=
{
N
}
,device_name=
{
arch_name
}
_
{
arch_cu
}
cu
{
dtype_selector
}{
block_shape_selector
}
.json"
else
:
return
f
"E=
{
E
}
,N=
{
N
}
,device_name=
{
device_name
}
{
dtype_selector
}{
block_shape_selector
}
_nn.json"
return
f
"E=
{
E
}
,N=
{
N
}
,device_name=
{
arch_name
}
_
{
arch_cu
}
cu
{
dtype_selector
}{
block_shape_selector
}
_nn.json"
# Adapted from: https://github.com/sgl-project/sglang/pull/2628
@
functools
.
lru_cache
...
...
vllm/utils/__init__.py
View file @
28021d6e
...
...
@@ -92,8 +92,8 @@ DEFAULT_MAX_NUM_BATCHED_TOKENS = 2048
POOLING_MODEL_MAX_NUM_BATCHED_TOKENS
=
32768
MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS
=
5120
gpuname
=
torch
.
cuda
.
get_device_properties
(
torch
.
cuda
.
current_device
()).
n
ame
SUPPORT_TC
=
gpuname
.
startswith
(
'K100_AI'
)
or
gpuname
.
startswith
(
'K500SM_AI'
)
or
gpuname
.
startswith
(
'BW'
)
GPU_ARCH
=
torch
.
cuda
.
get_device_properties
(
"cuda"
).
gcnArchN
ame
SUPPORT_TC
=
any
(
arch
in
GPU_ARCH
for
arch
in
[
"gfx928"
,
"gfx936"
]
)
# Constants related to forcing the attention backend selection
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment