Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
aa0addb3
Unverified
Commit
aa0addb3
authored
Oct 29, 2024
by
Yongzao
Committed by
GitHub
Oct 28, 2024
Browse files
Adding "torch compile" annotations to moe models (#9758)
parent
5f8d8075
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
8 additions
and
0 deletions
+8
-0
vllm/model_executor/models/arctic.py
vllm/model_executor/models/arctic.py
+2
-0
vllm/model_executor/models/mixtral.py
vllm/model_executor/models/mixtral.py
+2
-0
vllm/model_executor/models/olmoe.py
vllm/model_executor/models/olmoe.py
+2
-0
vllm/model_executor/models/phimoe.py
vllm/model_executor/models/phimoe.py
+2
-0
No files found.
vllm/model_executor/models/arctic.py
View file @
aa0addb3
...
...
@@ -5,6 +5,7 @@ import torch
from
torch
import
nn
from
vllm.attention
import
Attention
,
AttentionMetadata
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
(
get_pp_group
,
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
...
...
@@ -360,6 +361,7 @@ class ArcticDecoderLayer(nn.Module):
return
hidden_states
@
support_torch_compile
class
ArcticModel
(
nn
.
Module
):
def
__init__
(
...
...
vllm/model_executor/models/mixtral.py
View file @
aa0addb3
...
...
@@ -28,6 +28,7 @@ from torch import nn
from
transformers
import
MixtralConfig
from
vllm.attention
import
Attention
,
AttentionMetadata
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
LoRAConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
...
...
@@ -245,6 +246,7 @@ class MixtralDecoderLayer(nn.Module):
return
hidden_states
,
residual
@
support_torch_compile
class
MixtralModel
(
nn
.
Module
):
def
__init__
(
...
...
vllm/model_executor/models/olmoe.py
View file @
aa0addb3
...
...
@@ -17,6 +17,7 @@ from torch import nn
from
transformers
import
PretrainedConfig
from
vllm.attention
import
Attention
,
AttentionMetadata
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
...
...
@@ -239,6 +240,7 @@ class OlmoeDecoderLayer(nn.Module):
return
hidden_states
,
residual
@
support_torch_compile
class
OlmoeModel
(
nn
.
Module
):
def
__init__
(
...
...
vllm/model_executor/models/phimoe.py
View file @
aa0addb3
...
...
@@ -28,6 +28,7 @@ from torch import nn
from
transformers.configuration_utils
import
PretrainedConfig
from
vllm.attention
import
Attention
,
AttentionMetadata
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
LoRAConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
...
...
@@ -429,6 +430,7 @@ class PhiMoEDecoderLayer(nn.Module):
return
hidden_states
,
residual
@
support_torch_compile
class
PhiMoEModel
(
nn
.
Module
):
def
__init__
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment