Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
9013e24f
Unverified
Commit
9013e24f
authored
Oct 24, 2024
by
Yongzao
Committed by
GitHub
Oct 23, 2024
Browse files
[torch.compile] Adding torch compile annotations to some models (#9614)
parent
fd0e2cfd
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
12 additions
and
0 deletions
+12
-0
vllm/model_executor/models/baichuan.py
vllm/model_executor/models/baichuan.py
+2
-0
vllm/model_executor/models/bloom.py
vllm/model_executor/models/bloom.py
+2
-0
vllm/model_executor/models/commandr.py
vllm/model_executor/models/commandr.py
+2
-0
vllm/model_executor/models/exaone.py
vllm/model_executor/models/exaone.py
+2
-0
vllm/model_executor/models/gemma.py
vllm/model_executor/models/gemma.py
+2
-0
vllm/model_executor/models/gpt2.py
vllm/model_executor/models/gpt2.py
+2
-0
No files found.
vllm/model_executor/models/baichuan.py
View file @
9013e24f
...
...
@@ -26,6 +26,7 @@ from torch import nn
from
transformers
import
PretrainedConfig
from
vllm.attention
import
Attention
,
AttentionMetadata
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
LoRAConfig
from
vllm.distributed
import
(
get_pp_group
,
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
)
...
...
@@ -250,6 +251,7 @@ class BaiChuanDecoderLayer(nn.Module):
return
hidden_states
,
residual
@
support_torch_compile
class
BaiChuanModel
(
nn
.
Module
):
def
__init__
(
self
,
...
...
vllm/model_executor/models/bloom.py
View file @
9013e24f
...
...
@@ -24,6 +24,7 @@ from torch import nn
from
transformers
import
BloomConfig
from
vllm.attention
import
Attention
,
AttentionMetadata
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
(
get_pp_group
,
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
)
...
...
@@ -218,6 +219,7 @@ class BloomBlock(nn.Module):
return
output
@
support_torch_compile
class
BloomModel
(
nn
.
Module
):
def
__init__
(
...
...
vllm/model_executor/models/commandr.py
View file @
9013e24f
...
...
@@ -28,6 +28,7 @@ from torch import nn
from
transformers
import
CohereConfig
from
vllm.attention
import
Attention
,
AttentionMetadata
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
LoRAConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
...
...
@@ -250,6 +251,7 @@ class CohereDecoderLayer(nn.Module):
return
hidden_states
,
residual
@
support_torch_compile
class
CohereModel
(
nn
.
Module
):
def
__init__
(
...
...
vllm/model_executor/models/exaone.py
View file @
9013e24f
...
...
@@ -29,6 +29,7 @@ import torch
from
torch
import
nn
from
vllm.attention
import
Attention
,
AttentionMetadata
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
LoRAConfig
from
vllm.distributed
import
(
get_pp_group
,
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
)
...
...
@@ -311,6 +312,7 @@ class ExaoneDecoderLayer(nn.Module):
return
hidden_states
,
residual
@
support_torch_compile
class
ExaoneModel
(
nn
.
Module
):
def
__init__
(
...
...
vllm/model_executor/models/gemma.py
View file @
9013e24f
...
...
@@ -22,6 +22,7 @@ from torch import nn
from
transformers
import
GemmaConfig
from
vllm.attention
import
Attention
,
AttentionMetadata
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
LoRAConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.logger
import
init_logger
...
...
@@ -239,6 +240,7 @@ class GemmaDecoderLayer(nn.Module):
return
hidden_states
,
residual
@
support_torch_compile
class
GemmaModel
(
nn
.
Module
):
def
__init__
(
...
...
vllm/model_executor/models/gpt2.py
View file @
9013e24f
...
...
@@ -24,6 +24,7 @@ from torch import nn
from
transformers
import
GPT2Config
from
vllm.attention
import
Attention
,
AttentionMetadata
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
from
vllm.distributed.parallel_state
import
(
get_pp_group
,
get_tensor_model_parallel_world_size
)
...
...
@@ -182,6 +183,7 @@ class GPT2Block(nn.Module):
return
hidden_states
@
support_torch_compile
class
GPT2Model
(
nn
.
Module
):
def
__init__
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment