Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
a2a3afbc
Unverified
Commit
a2a3afbc
authored
Sep 14, 2022
by
Sylvain Gugger
Committed by
GitHub
Sep 14, 2022
Browse files
PyTorch >= 1.7.0 and TensorFlow >= 2.4.0 (#19016)
parent
9f4acd05
Changes
30
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
73 additions
and
216 deletions
+73
-216
setup.py
setup.py
+2
-2
src/transformers/activations.py
src/transformers/activations.py
+2
-12
src/transformers/dependency_versions_table.py
src/transformers/dependency_versions_table.py
+2
-2
src/transformers/models/albert/modeling_albert.py
src/transformers/models/albert/modeling_albert.py
+4
-12
src/transformers/models/bert/modeling_bert.py
src/transformers/models/bert/modeling_bert.py
+4
-12
src/transformers/models/big_bird/modeling_big_bird.py
src/transformers/models/big_bird/modeling_big_bird.py
+4
-7
src/transformers/models/convbert/modeling_convbert.py
src/transformers/models/convbert/modeling_convbert.py
+4
-12
src/transformers/models/data2vec/modeling_data2vec_text.py
src/transformers/models/data2vec/modeling_data2vec_text.py
+4
-12
src/transformers/models/decision_transformer/modeling_decision_transformer.py
...els/decision_transformer/modeling_decision_transformer.py
+4
-21
src/transformers/models/distilbert/modeling_distilbert.py
src/transformers/models/distilbert/modeling_distilbert.py
+4
-10
src/transformers/models/electra/modeling_electra.py
src/transformers/models/electra/modeling_electra.py
+4
-12
src/transformers/models/ernie/modeling_ernie.py
src/transformers/models/ernie/modeling_ernie.py
+4
-12
src/transformers/models/flaubert/modeling_flaubert.py
src/transformers/models/flaubert/modeling_flaubert.py
+3
-5
src/transformers/models/flava/modeling_flava.py
src/transformers/models/flava/modeling_flava.py
+3
-7
src/transformers/models/fnet/modeling_fnet.py
src/transformers/models/fnet/modeling_fnet.py
+4
-7
src/transformers/models/gpt2/modeling_gpt2.py
src/transformers/models/gpt2/modeling_gpt2.py
+3
-20
src/transformers/models/imagegpt/modeling_imagegpt.py
src/transformers/models/imagegpt/modeling_imagegpt.py
+3
-20
src/transformers/models/mctct/modeling_mctct.py
src/transformers/models/mctct/modeling_mctct.py
+5
-7
src/transformers/models/nezha/modeling_nezha.py
src/transformers/models/nezha/modeling_nezha.py
+4
-12
src/transformers/models/nystromformer/modeling_nystromformer.py
...ansformers/models/nystromformer/modeling_nystromformer.py
+6
-12
No files found.
setup.py
View file @
a2a3afbc
...
...
@@ -155,13 +155,13 @@ _deps = [
"librosa"
,
"starlette"
,
"tensorflow-cpu>=2.3"
,
"tensorflow>=2.
3
"
,
"tensorflow>=2.
4
"
,
"tensorflow-text"
,
"tf2onnx"
,
"timeout-decorator"
,
"timm"
,
"tokenizers>=0.11.1,!=0.11.3,<0.13"
,
"torch>=1.
0
,!=
0
.12.0"
,
"torch>=1.
7
,!=
1
.12.0"
,
"torchaudio"
,
"pyctcdecode>=0.3.0"
,
"tqdm>=4.27"
,
...
...
src/transformers/activations.py
View file @
a2a3afbc
...
...
@@ -44,7 +44,7 @@ class GELUActivation(nn.Module):
def
__init__
(
self
,
use_gelu_python
:
bool
=
False
):
super
().
__init__
()
if
version
.
parse
(
version
.
parse
(
torch
.
__version__
).
base_version
)
<
version
.
parse
(
"1.4"
)
or
use_gelu_python
:
if
use_gelu_python
:
self
.
act
=
self
.
_gelu_python
else
:
self
.
act
=
nn
.
functional
.
gelu
...
...
@@ -108,18 +108,8 @@ class SiLUActivation(nn.Module):
later.
"""
def
__init__
(
self
):
super
().
__init__
()
if
version
.
parse
(
version
.
parse
(
torch
.
__version__
).
base_version
)
<
version
.
parse
(
"1.7"
):
self
.
act
=
self
.
_silu_python
else
:
self
.
act
=
nn
.
functional
.
silu
def
_silu_python
(
self
,
input
:
Tensor
)
->
Tensor
:
return
input
*
torch
.
sigmoid
(
input
)
def
forward
(
self
,
input
:
Tensor
)
->
Tensor
:
return
self
.
act
(
input
)
return
nn
.
functional
.
silu
(
input
)
class
MishActivation
(
nn
.
Module
):
...
...
src/transformers/dependency_versions_table.py
View file @
a2a3afbc
...
...
@@ -61,13 +61,13 @@ deps = {
"librosa"
:
"librosa"
,
"starlette"
:
"starlette"
,
"tensorflow-cpu"
:
"tensorflow-cpu>=2.3"
,
"tensorflow"
:
"tensorflow>=2.
3
"
,
"tensorflow"
:
"tensorflow>=2.
4
"
,
"tensorflow-text"
:
"tensorflow-text"
,
"tf2onnx"
:
"tf2onnx"
,
"timeout-decorator"
:
"timeout-decorator"
,
"timm"
:
"timm"
,
"tokenizers"
:
"tokenizers>=0.11.1,!=0.11.3,<0.13"
,
"torch"
:
"torch>=1.
0
,!=
0
.12.0"
,
"torch"
:
"torch>=1.
7
,!=
1
.12.0"
,
"torchaudio"
:
"torchaudio"
,
"pyctcdecode"
:
"pyctcdecode>=0.3.0"
,
"tqdm"
:
"tqdm>=4.27"
,
...
...
src/transformers/models/albert/modeling_albert.py
View file @
a2a3afbc
...
...
@@ -34,12 +34,7 @@ from ...modeling_outputs import (
TokenClassifierOutput
,
)
from
...modeling_utils
import
PreTrainedModel
from
...pytorch_utils
import
(
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
is_torch_greater_than_1_6
,
prune_linear_layer
,
)
from
...pytorch_utils
import
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
prune_linear_layer
from
...utils
import
(
ModelOutput
,
add_code_sample_docstrings
,
...
...
@@ -216,12 +211,9 @@ class AlbertEmbeddings(nn.Module):
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self
.
register_buffer
(
"position_ids"
,
torch
.
arange
(
config
.
max_position_embeddings
).
expand
((
1
,
-
1
)))
self
.
position_embedding_type
=
getattr
(
config
,
"position_embedding_type"
,
"absolute"
)
if
is_torch_greater_than_1_6
:
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
,
)
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
)
# Copied from transformers.models.bert.modeling_bert.BertEmbeddings.forward
def
forward
(
...
...
src/transformers/models/bert/modeling_bert.py
View file @
a2a3afbc
...
...
@@ -40,12 +40,7 @@ from ...modeling_outputs import (
TokenClassifierOutput
,
)
from
...modeling_utils
import
PreTrainedModel
from
...pytorch_utils
import
(
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
is_torch_greater_than_1_6
,
prune_linear_layer
,
)
from
...pytorch_utils
import
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
prune_linear_layer
from
...utils
import
(
ModelOutput
,
add_code_sample_docstrings
,
...
...
@@ -199,12 +194,9 @@ class BertEmbeddings(nn.Module):
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self
.
position_embedding_type
=
getattr
(
config
,
"position_embedding_type"
,
"absolute"
)
self
.
register_buffer
(
"position_ids"
,
torch
.
arange
(
config
.
max_position_embeddings
).
expand
((
1
,
-
1
)))
if
is_torch_greater_than_1_6
:
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
,
)
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
)
def
forward
(
self
,
...
...
src/transformers/models/big_bird/modeling_big_bird.py
View file @
a2a3afbc
...
...
@@ -37,7 +37,7 @@ from ...modeling_outputs import (
TokenClassifierOutput
,
)
from
...modeling_utils
import
PreTrainedModel
from
...pytorch_utils
import
apply_chunking_to_forward
,
is_torch_greater_than_1_6
from
...pytorch_utils
import
apply_chunking_to_forward
from
...utils
import
(
ModelOutput
,
add_code_sample_docstrings
,
...
...
@@ -259,12 +259,9 @@ class BigBirdEmbeddings(nn.Module):
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self
.
position_embedding_type
=
getattr
(
config
,
"position_embedding_type"
,
"absolute"
)
self
.
register_buffer
(
"position_ids"
,
torch
.
arange
(
config
.
max_position_embeddings
).
expand
((
1
,
-
1
)))
if
is_torch_greater_than_1_6
:
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
,
)
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
)
# End copy
self
.
rescale_embeddings
=
config
.
rescale_embeddings
...
...
src/transformers/models/convbert/modeling_convbert.py
View file @
a2a3afbc
...
...
@@ -35,12 +35,7 @@ from ...modeling_outputs import (
TokenClassifierOutput
,
)
from
...modeling_utils
import
PreTrainedModel
,
SequenceSummary
from
...pytorch_utils
import
(
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
is_torch_greater_than_1_6
,
prune_linear_layer
,
)
from
...pytorch_utils
import
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
prune_linear_layer
from
...utils
import
add_code_sample_docstrings
,
add_start_docstrings
,
add_start_docstrings_to_model_forward
,
logging
from
.configuration_convbert
import
ConvBertConfig
...
...
@@ -198,12 +193,9 @@ class ConvBertEmbeddings(nn.Module):
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self
.
register_buffer
(
"position_ids"
,
torch
.
arange
(
config
.
max_position_embeddings
).
expand
((
1
,
-
1
)))
if
is_torch_greater_than_1_6
:
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
,
)
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
)
def
forward
(
self
,
...
...
src/transformers/models/data2vec/modeling_data2vec_text.py
View file @
a2a3afbc
...
...
@@ -34,12 +34,7 @@ from ...modeling_outputs import (
TokenClassifierOutput
,
)
from
...modeling_utils
import
PreTrainedModel
from
...pytorch_utils
import
(
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
is_torch_greater_than_1_6
,
prune_linear_layer
,
)
from
...pytorch_utils
import
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
prune_linear_layer
from
...utils
import
(
add_code_sample_docstrings
,
add_start_docstrings
,
...
...
@@ -87,12 +82,9 @@ class Data2VecTextForTextEmbeddings(nn.Module):
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self
.
position_embedding_type
=
getattr
(
config
,
"position_embedding_type"
,
"absolute"
)
self
.
register_buffer
(
"position_ids"
,
torch
.
arange
(
config
.
max_position_embeddings
).
expand
((
1
,
-
1
)))
if
is_torch_greater_than_1_6
:
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
,
)
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
)
# End copy
self
.
padding_idx
=
config
.
pad_token_id
...
...
src/transformers/models/decision_transformer/modeling_decision_transformer.py
View file @
a2a3afbc
...
...
@@ -22,15 +22,12 @@ from typing import Optional, Tuple, Union
import
torch
import
torch.utils.checkpoint
from
torch
import
nn
from
torch.cuda.amp
import
autocast
from
...activations
import
ACT2FN
from
...modeling_outputs
import
BaseModelOutputWithPastAndCrossAttentions
from
...modeling_utils
import
PreTrainedModel
from
...pytorch_utils
import
(
Conv1D
,
find_pruneable_heads_and_indices
,
is_torch_greater_or_equal_than_1_6
,
prune_conv1d_layer
,
)
from
...pytorch_utils
import
Conv1D
,
find_pruneable_heads_and_indices
,
prune_conv1d_layer
from
...utils
import
(
ModelOutput
,
add_start_docstrings
,
...
...
@@ -38,15 +35,6 @@ from ...utils import (
logging
,
replace_return_docstrings
,
)
if
is_torch_greater_or_equal_than_1_6
:
is_amp_available
=
True
from
torch.cuda.amp
import
autocast
else
:
is_amp_available
=
False
from
...modeling_outputs
import
BaseModelOutputWithPastAndCrossAttentions
from
.configuration_decision_transformer
import
DecisionTransformerConfig
...
...
@@ -235,12 +223,7 @@ class DecisionTransformerGPT2Attention(nn.Module):
scale_factor
/=
float
(
self
.
layer_idx
+
1
)
# Upcast (turn off autocast) and reorder (Scale K by 1 / root(dk))
if
is_amp_available
:
with
autocast
(
enabled
=
False
):
q
,
k
=
query
.
reshape
(
-
1
,
q_seq_len
,
dk
),
key
.
transpose
(
-
1
,
-
2
).
reshape
(
-
1
,
dk
,
k_seq_len
)
attn_weights
=
torch
.
baddbmm
(
attn_weights
,
q
.
float
(),
k
.
float
(),
beta
=
0
,
alpha
=
scale_factor
)
attn_weights
=
attn_weights
.
reshape
(
bsz
,
num_heads
,
q_seq_len
,
k_seq_len
)
else
:
with
autocast
(
enabled
=
False
):
q
,
k
=
query
.
reshape
(
-
1
,
q_seq_len
,
dk
),
key
.
transpose
(
-
1
,
-
2
).
reshape
(
-
1
,
dk
,
k_seq_len
)
attn_weights
=
torch
.
baddbmm
(
attn_weights
,
q
.
float
(),
k
.
float
(),
beta
=
0
,
alpha
=
scale_factor
)
attn_weights
=
attn_weights
.
reshape
(
bsz
,
num_heads
,
q_seq_len
,
k_seq_len
)
...
...
src/transformers/models/distilbert/modeling_distilbert.py
View file @
a2a3afbc
...
...
@@ -39,12 +39,7 @@ from ...modeling_outputs import (
TokenClassifierOutput
,
)
from
...modeling_utils
import
PreTrainedModel
from
...pytorch_utils
import
(
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
is_torch_greater_than_1_6
,
prune_linear_layer
,
)
from
...pytorch_utils
import
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
prune_linear_layer
from
...utils
import
(
add_code_sample_docstrings
,
add_start_docstrings
,
...
...
@@ -106,10 +101,9 @@ class Embeddings(nn.Module):
self
.
LayerNorm
=
nn
.
LayerNorm
(
config
.
dim
,
eps
=
1e-12
)
self
.
dropout
=
nn
.
Dropout
(
config
.
dropout
)
if
is_torch_greater_than_1_6
:
self
.
register_buffer
(
"position_ids"
,
torch
.
arange
(
config
.
max_position_embeddings
).
expand
((
1
,
-
1
)),
persistent
=
False
)
self
.
register_buffer
(
"position_ids"
,
torch
.
arange
(
config
.
max_position_embeddings
).
expand
((
1
,
-
1
)),
persistent
=
False
)
def
forward
(
self
,
input_ids
:
torch
.
Tensor
)
->
torch
.
Tensor
:
"""
...
...
src/transformers/models/electra/modeling_electra.py
View file @
a2a3afbc
...
...
@@ -36,12 +36,7 @@ from ...modeling_outputs import (
TokenClassifierOutput
,
)
from
...modeling_utils
import
PreTrainedModel
,
SequenceSummary
from
...pytorch_utils
import
(
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
is_torch_greater_than_1_6
,
prune_linear_layer
,
)
from
...pytorch_utils
import
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
prune_linear_layer
from
...utils
import
(
ModelOutput
,
add_code_sample_docstrings
,
...
...
@@ -169,12 +164,9 @@ class ElectraEmbeddings(nn.Module):
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self
.
register_buffer
(
"position_ids"
,
torch
.
arange
(
config
.
max_position_embeddings
).
expand
((
1
,
-
1
)))
self
.
position_embedding_type
=
getattr
(
config
,
"position_embedding_type"
,
"absolute"
)
if
is_torch_greater_than_1_6
:
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
,
)
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
)
# Copied from transformers.models.bert.modeling_bert.BertEmbeddings.forward
def
forward
(
...
...
src/transformers/models/ernie/modeling_ernie.py
View file @
a2a3afbc
...
...
@@ -38,12 +38,7 @@ from ...modeling_outputs import (
TokenClassifierOutput
,
)
from
...modeling_utils
import
PreTrainedModel
from
...pytorch_utils
import
(
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
is_torch_greater_than_1_6
,
prune_linear_layer
,
)
from
...pytorch_utils
import
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
prune_linear_layer
from
...utils
import
(
ModelOutput
,
add_code_sample_docstrings
,
...
...
@@ -96,12 +91,9 @@ class ErnieEmbeddings(nn.Module):
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self
.
position_embedding_type
=
getattr
(
config
,
"position_embedding_type"
,
"absolute"
)
self
.
register_buffer
(
"position_ids"
,
torch
.
arange
(
config
.
max_position_embeddings
).
expand
((
1
,
-
1
)))
if
is_torch_greater_than_1_6
:
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
,
)
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
)
def
forward
(
self
,
...
...
src/transformers/models/flaubert/modeling_flaubert.py
View file @
a2a3afbc
...
...
@@ -22,7 +22,6 @@ import torch
from
torch
import
nn
from
...modeling_outputs
import
BaseModelOutput
from
...pytorch_utils
import
is_torch_greater_than_1_6
from
...utils
import
add_code_sample_docstrings
,
add_start_docstrings
,
add_start_docstrings_to_model_forward
,
logging
from
..xlm.modeling_xlm
import
(
XLMForMultipleChoice
,
...
...
@@ -139,10 +138,9 @@ class FlaubertModel(XLMModel):
super
().
__init__
(
config
)
self
.
layerdrop
=
getattr
(
config
,
"layerdrop"
,
0.0
)
self
.
pre_norm
=
getattr
(
config
,
"pre_norm"
,
False
)
if
is_torch_greater_than_1_6
:
self
.
register_buffer
(
"position_ids"
,
torch
.
arange
(
config
.
max_position_embeddings
).
expand
((
1
,
-
1
)),
persistent
=
False
)
self
.
register_buffer
(
"position_ids"
,
torch
.
arange
(
config
.
max_position_embeddings
).
expand
((
1
,
-
1
)),
persistent
=
False
)
@
add_start_docstrings_to_model_forward
(
FLAUBERT_INPUTS_DOCSTRING
)
@
add_code_sample_docstrings
(
...
...
src/transformers/models/flava/modeling_flava.py
View file @
a2a3afbc
...
...
@@ -29,7 +29,6 @@ from transformers.utils.doc import add_code_sample_docstrings
from
...activations
import
ACT2FN
from
...modeling_outputs
import
BaseModelOutput
,
BaseModelOutputWithPooling
from
...modeling_utils
import
PreTrainedModel
,
find_pruneable_heads_and_indices
,
prune_linear_layer
from
...pytorch_utils
import
is_torch_greater_than_1_6
from
...utils
import
(
ModelOutput
,
add_start_docstrings
,
...
...
@@ -392,12 +391,9 @@ class FlavaTextEmbeddings(nn.Module):
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self
.
position_embedding_type
=
getattr
(
config
,
"position_embedding_type"
,
"absolute"
)
self
.
register_buffer
(
"position_ids"
,
torch
.
arange
(
config
.
max_position_embeddings
).
expand
((
1
,
-
1
)))
if
is_torch_greater_than_1_6
:
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
,
)
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
)
def
forward
(
self
,
...
...
src/transformers/models/fnet/modeling_fnet.py
View file @
a2a3afbc
...
...
@@ -43,7 +43,7 @@ from ...modeling_outputs import (
TokenClassifierOutput
,
)
from
...modeling_utils
import
PreTrainedModel
from
...pytorch_utils
import
apply_chunking_to_forward
,
is_torch_greater_than_1_6
from
...pytorch_utils
import
apply_chunking_to_forward
from
...utils
import
(
add_code_sample_docstrings
,
add_start_docstrings
,
...
...
@@ -117,12 +117,9 @@ class FNetEmbeddings(nn.Module):
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self
.
register_buffer
(
"position_ids"
,
torch
.
arange
(
config
.
max_position_embeddings
).
expand
((
1
,
-
1
)))
if
is_torch_greater_than_1_6
:
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
,
)
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
),
persistent
=
False
)
def
forward
(
self
,
input_ids
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
inputs_embeds
=
None
):
if
input_ids
is
not
None
:
...
...
src/transformers/models/gpt2/modeling_gpt2.py
View file @
a2a3afbc
...
...
@@ -23,22 +23,9 @@ from typing import Optional, Tuple, Union
import
torch
import
torch.utils.checkpoint
from
torch
import
nn
from
torch.cuda.amp
import
autocast
from
torch.nn
import
BCEWithLogitsLoss
,
CrossEntropyLoss
,
MSELoss
from
...pytorch_utils
import
(
Conv1D
,
find_pruneable_heads_and_indices
,
is_torch_greater_or_equal_than_1_6
,
prune_conv1d_layer
,
)
if
is_torch_greater_or_equal_than_1_6
:
is_amp_available
=
True
from
torch.cuda.amp
import
autocast
else
:
is_amp_available
=
False
from
...activations
import
ACT2FN
from
...modeling_outputs
import
(
BaseModelOutputWithPastAndCrossAttentions
,
...
...
@@ -47,6 +34,7 @@ from ...modeling_outputs import (
TokenClassifierOutput
,
)
from
...modeling_utils
import
PreTrainedModel
,
SequenceSummary
from
...pytorch_utils
import
Conv1D
,
find_pruneable_heads_and_indices
,
prune_conv1d_layer
from
...utils
import
(
ModelOutput
,
add_code_sample_docstrings
,
...
...
@@ -247,12 +235,7 @@ class GPT2Attention(nn.Module):
scale_factor
/=
float
(
self
.
layer_idx
+
1
)
# Upcast (turn off autocast) and reorder (Scale K by 1 / root(dk))
if
is_amp_available
:
with
autocast
(
enabled
=
False
):
q
,
k
=
query
.
reshape
(
-
1
,
q_seq_len
,
dk
),
key
.
transpose
(
-
1
,
-
2
).
reshape
(
-
1
,
dk
,
k_seq_len
)
attn_weights
=
torch
.
baddbmm
(
attn_weights
,
q
.
float
(),
k
.
float
(),
beta
=
0
,
alpha
=
scale_factor
)
attn_weights
=
attn_weights
.
reshape
(
bsz
,
num_heads
,
q_seq_len
,
k_seq_len
)
else
:
with
autocast
(
enabled
=
False
):
q
,
k
=
query
.
reshape
(
-
1
,
q_seq_len
,
dk
),
key
.
transpose
(
-
1
,
-
2
).
reshape
(
-
1
,
dk
,
k_seq_len
)
attn_weights
=
torch
.
baddbmm
(
attn_weights
,
q
.
float
(),
k
.
float
(),
beta
=
0
,
alpha
=
scale_factor
)
attn_weights
=
attn_weights
.
reshape
(
bsz
,
num_heads
,
q_seq_len
,
k_seq_len
)
...
...
src/transformers/models/imagegpt/modeling_imagegpt.py
View file @
a2a3afbc
...
...
@@ -22,22 +22,9 @@ from typing import Any, Optional, Tuple, Union
import
torch
import
torch.utils.checkpoint
from
torch
import
nn
from
torch.cuda.amp
import
autocast
from
torch.nn
import
BCEWithLogitsLoss
,
CrossEntropyLoss
,
MSELoss
from
...pytorch_utils
import
(
Conv1D
,
find_pruneable_heads_and_indices
,
is_torch_greater_or_equal_than_1_6
,
prune_conv1d_layer
,
)
if
is_torch_greater_or_equal_than_1_6
:
is_amp_available
=
True
from
torch.cuda.amp
import
autocast
else
:
is_amp_available
=
False
from
...activations
import
ACT2FN
from
...modeling_outputs
import
(
BaseModelOutputWithPastAndCrossAttentions
,
...
...
@@ -45,6 +32,7 @@ from ...modeling_outputs import (
SequenceClassifierOutputWithPast
,
)
from
...modeling_utils
import
PreTrainedModel
from
...pytorch_utils
import
Conv1D
,
find_pruneable_heads_and_indices
,
prune_conv1d_layer
from
...utils
import
add_start_docstrings
,
add_start_docstrings_to_model_forward
,
logging
,
replace_return_docstrings
from
.configuration_imagegpt
import
ImageGPTConfig
...
...
@@ -299,12 +287,7 @@ class ImageGPTAttention(nn.Module):
scale_factor
/=
float
(
self
.
layer_idx
+
1
)
# Upcast (turn off autocast) and reorder (Scale K by 1 / root(dk))
if
is_amp_available
:
with
autocast
(
enabled
=
False
):
q
,
k
=
query
.
reshape
(
-
1
,
q_seq_len
,
dk
),
key
.
transpose
(
-
1
,
-
2
).
reshape
(
-
1
,
dk
,
k_seq_len
)
attn_weights
=
torch
.
baddbmm
(
attn_weights
,
q
.
float
(),
k
.
float
(),
beta
=
0
,
alpha
=
scale_factor
)
attn_weights
=
attn_weights
.
reshape
(
bsz
,
num_heads
,
q_seq_len
,
k_seq_len
)
else
:
with
autocast
(
enabled
=
False
):
q
,
k
=
query
.
reshape
(
-
1
,
q_seq_len
,
dk
),
key
.
transpose
(
-
1
,
-
2
).
reshape
(
-
1
,
dk
,
k_seq_len
)
attn_weights
=
torch
.
baddbmm
(
attn_weights
,
q
.
float
(),
k
.
float
(),
beta
=
0
,
alpha
=
scale_factor
)
attn_weights
=
attn_weights
.
reshape
(
bsz
,
num_heads
,
q_seq_len
,
k_seq_len
)
...
...
src/transformers/models/mctct/modeling_mctct.py
View file @
a2a3afbc
...
...
@@ -33,7 +33,6 @@ from ...modeling_utils import (
find_pruneable_heads_and_indices
,
prune_linear_layer
,
)
from
...pytorch_utils
import
is_torch_greater_than_1_6
from
...utils
import
logging
from
.configuration_mctct
import
MCTCTConfig
...
...
@@ -153,12 +152,11 @@ class MCTCTEmbeddings(nn.Module):
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self
.
register_buffer
(
"position_ids"
,
torch
.
arange
(
config
.
max_position_embeddings
).
expand
((
1
,
-
1
)))
if
is_torch_greater_than_1_6
:
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
,
device
=
self
.
position_ids
.
device
),
persistent
=
False
,
)
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
,
device
=
self
.
position_ids
.
device
),
persistent
=
False
,
)
def
forward
(
self
,
input_features
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
inputs_embeds
=
None
,
past_key_values_length
=
0
...
...
src/transformers/models/nezha/modeling_nezha.py
View file @
a2a3afbc
...
...
@@ -38,12 +38,7 @@ from ...modeling_outputs import (
TokenClassifierOutput
,
)
from
...modeling_utils
import
PreTrainedModel
from
...pytorch_utils
import
(
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
is_torch_greater_than_1_6
,
prune_linear_layer
,
)
from
...pytorch_utils
import
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
prune_linear_layer
from
...utils
import
(
ModelOutput
,
add_code_sample_docstrings
,
...
...
@@ -187,12 +182,9 @@ class NezhaEmbeddings(nn.Module):
# any TensorFlow checkpoint file
self
.
LayerNorm
=
nn
.
LayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
if
is_torch_greater_than_1_6
:
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
((
1
,
config
.
max_position_embeddings
),
dtype
=
torch
.
long
),
persistent
=
False
,
)
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
((
1
,
config
.
max_position_embeddings
),
dtype
=
torch
.
long
),
persistent
=
False
)
def
forward
(
self
,
...
...
src/transformers/models/nystromformer/modeling_nystromformer.py
View file @
a2a3afbc
...
...
@@ -33,12 +33,7 @@ from ...modeling_outputs import (
TokenClassifierOutput
,
)
from
...modeling_utils
import
PreTrainedModel
from
...pytorch_utils
import
(
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
is_torch_greater_than_1_6
,
prune_linear_layer
,
)
from
...pytorch_utils
import
apply_chunking_to_forward
,
find_pruneable_heads_and_indices
,
prune_linear_layer
from
...utils
import
add_code_sample_docstrings
,
add_start_docstrings
,
add_start_docstrings_to_model_forward
,
logging
from
.configuration_nystromformer
import
NystromformerConfig
...
...
@@ -72,12 +67,11 @@ class NystromformerEmbeddings(nn.Module):
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self
.
register_buffer
(
"position_ids"
,
torch
.
arange
(
config
.
max_position_embeddings
).
expand
((
1
,
-
1
))
+
2
)
self
.
position_embedding_type
=
getattr
(
config
,
"position_embedding_type"
,
"absolute"
)
if
is_torch_greater_than_1_6
:
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
,
device
=
self
.
position_ids
.
device
),
persistent
=
False
,
)
self
.
register_buffer
(
"token_type_ids"
,
torch
.
zeros
(
self
.
position_ids
.
size
(),
dtype
=
torch
.
long
,
device
=
self
.
position_ids
.
device
),
persistent
=
False
,
)
def
forward
(
self
,
input_ids
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
inputs_embeds
=
None
):
if
input_ids
is
not
None
:
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment