Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
32dbb2d9
"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "2c5597f6c7862491cd723391453c5c82e03263d9"
Unverified
Commit
32dbb2d9
authored
Apr 26, 2021
by
Patrick von Platen
Committed by
GitHub
Apr 26, 2021
Browse files
make style (#11442)
parent
04ab2ca6
Changes
105
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
33 additions
and
33 deletions
+33
-33
src/transformers/models/albert/modeling_albert.py
src/transformers/models/albert/modeling_albert.py
+1
-1
src/transformers/models/albert/modeling_tf_albert.py
src/transformers/models/albert/modeling_tf_albert.py
+1
-1
src/transformers/models/albert/tokenization_albert.py
src/transformers/models/albert/tokenization_albert.py
+2
-2
src/transformers/models/barthez/tokenization_barthez.py
src/transformers/models/barthez/tokenization_barthez.py
+1
-1
src/transformers/models/bert/modeling_bert.py
src/transformers/models/bert/modeling_bert.py
+1
-1
src/transformers/models/bert/tokenization_bert.py
src/transformers/models/bert/tokenization_bert.py
+2
-2
src/transformers/models/bert_generation/modeling_bert_generation.py
...ormers/models/bert_generation/modeling_bert_generation.py
+1
-1
src/transformers/models/bert_generation/tokenization_bert_generation.py
...rs/models/bert_generation/tokenization_bert_generation.py
+2
-2
src/transformers/models/bertweet/tokenization_bertweet.py
src/transformers/models/bertweet/tokenization_bertweet.py
+2
-2
src/transformers/models/big_bird/modeling_big_bird.py
src/transformers/models/big_bird/modeling_big_bird.py
+3
-3
src/transformers/models/big_bird/tokenization_big_bird.py
src/transformers/models/big_bird/tokenization_big_bird.py
+2
-2
src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py
.../models/blenderbot_small/tokenization_blenderbot_small.py
+3
-3
src/transformers/models/camembert/tokenization_camembert.py
src/transformers/models/camembert/tokenization_camembert.py
+1
-1
src/transformers/models/convbert/modeling_convbert.py
src/transformers/models/convbert/modeling_convbert.py
+1
-1
src/transformers/models/ctrl/tokenization_ctrl.py
src/transformers/models/ctrl/tokenization_ctrl.py
+2
-2
src/transformers/models/deberta_v2/tokenization_deberta_v2.py
...transformers/models/deberta_v2/tokenization_deberta_v2.py
+2
-2
src/transformers/models/deit/modeling_deit.py
src/transformers/models/deit/modeling_deit.py
+1
-1
src/transformers/models/distilbert/modeling_distilbert.py
src/transformers/models/distilbert/modeling_distilbert.py
+2
-2
src/transformers/models/distilbert/modeling_tf_distilbert.py
src/transformers/models/distilbert/modeling_tf_distilbert.py
+2
-2
src/transformers/models/electra/modeling_electra.py
src/transformers/models/electra/modeling_electra.py
+1
-1
No files found.
src/transformers/models/albert/modeling_albert.py
View file @
32dbb2d9
...
@@ -71,7 +71,7 @@ ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
...
@@ -71,7 +71,7 @@ ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
def
load_tf_weights_in_albert
(
model
,
config
,
tf_checkpoint_path
):
def
load_tf_weights_in_albert
(
model
,
config
,
tf_checkpoint_path
):
"""
Load tf checkpoints in a pytorch model."""
"""Load tf checkpoints in a pytorch model."""
try
:
try
:
import
re
import
re
...
...
src/transformers/models/albert/modeling_tf_albert.py
View file @
32dbb2d9
...
@@ -189,7 +189,7 @@ class TFAlbertEmbeddings(tf.keras.layers.Layer):
...
@@ -189,7 +189,7 @@ class TFAlbertEmbeddings(tf.keras.layers.Layer):
class
TFAlbertAttention
(
tf
.
keras
.
layers
.
Layer
):
class
TFAlbertAttention
(
tf
.
keras
.
layers
.
Layer
):
"""
Contains the complete attention sublayer, including both dropouts and layer norm.
"""
"""Contains the complete attention sublayer, including both dropouts and layer norm."""
def
__init__
(
self
,
config
:
AlbertConfig
,
**
kwargs
):
def
__init__
(
self
,
config
:
AlbertConfig
,
**
kwargs
):
super
().
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
...
...
src/transformers/models/albert/tokenization_albert.py
View file @
32dbb2d9
...
@@ -187,7 +187,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
...
@@ -187,7 +187,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
return
outputs
return
outputs
def
_tokenize
(
self
,
text
,
sample
=
False
):
def
_tokenize
(
self
,
text
,
sample
=
False
):
"""
Tokenize a string.
"""
"""Tokenize a string."""
text
=
self
.
preprocess_text
(
text
)
text
=
self
.
preprocess_text
(
text
)
if
not
sample
:
if
not
sample
:
...
@@ -211,7 +211,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
...
@@ -211,7 +211,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
return
new_pieces
return
new_pieces
def
_convert_token_to_id
(
self
,
token
):
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
return
self
.
sp_model
.
PieceToId
(
token
)
return
self
.
sp_model
.
PieceToId
(
token
)
def
_convert_id_to_token
(
self
,
index
):
def
_convert_id_to_token
(
self
,
index
):
...
...
src/transformers/models/barthez/tokenization_barthez.py
View file @
32dbb2d9
...
@@ -223,7 +223,7 @@ class BarthezTokenizer(PreTrainedTokenizer):
...
@@ -223,7 +223,7 @@ class BarthezTokenizer(PreTrainedTokenizer):
return
self
.
sp_model
.
EncodeAsPieces
(
text
)
return
self
.
sp_model
.
EncodeAsPieces
(
text
)
def
_convert_token_to_id
(
self
,
token
):
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
if
token
in
self
.
fairseq_tokens_to_ids
:
if
token
in
self
.
fairseq_tokens_to_ids
:
return
self
.
fairseq_tokens_to_ids
[
token
]
return
self
.
fairseq_tokens_to_ids
[
token
]
spm_id
=
self
.
sp_model
.
PieceToId
(
token
)
spm_id
=
self
.
sp_model
.
PieceToId
(
token
)
...
...
src/transformers/models/bert/modeling_bert.py
View file @
32dbb2d9
...
@@ -703,7 +703,7 @@ class BertPreTrainedModel(PreTrainedModel):
...
@@ -703,7 +703,7 @@ class BertPreTrainedModel(PreTrainedModel):
_keys_to_ignore_on_load_missing
=
[
r
"position_ids"
]
_keys_to_ignore_on_load_missing
=
[
r
"position_ids"
]
def
_init_weights
(
self
,
module
):
def
_init_weights
(
self
,
module
):
"""
Initialize the weights
"""
"""Initialize the weights"""
if
isinstance
(
module
,
nn
.
Linear
):
if
isinstance
(
module
,
nn
.
Linear
):
# Slightly different from the TF version which uses truncated_normal for initialization
# Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617
# cf https://github.com/pytorch/pytorch/pull/5617
...
...
src/transformers/models/bert/tokenization_bert.py
View file @
32dbb2d9
...
@@ -233,7 +233,7 @@ class BertTokenizer(PreTrainedTokenizer):
...
@@ -233,7 +233,7 @@ class BertTokenizer(PreTrainedTokenizer):
return
split_tokens
return
split_tokens
def
_convert_token_to_id
(
self
,
token
):
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
return
self
.
vocab
.
get
(
token
,
self
.
vocab
.
get
(
self
.
unk_token
))
return
self
.
vocab
.
get
(
token
,
self
.
vocab
.
get
(
self
.
unk_token
))
def
_convert_id_to_token
(
self
,
index
):
def
_convert_id_to_token
(
self
,
index
):
...
@@ -241,7 +241,7 @@ class BertTokenizer(PreTrainedTokenizer):
...
@@ -241,7 +241,7 @@ class BertTokenizer(PreTrainedTokenizer):
return
self
.
ids_to_tokens
.
get
(
index
,
self
.
unk_token
)
return
self
.
ids_to_tokens
.
get
(
index
,
self
.
unk_token
)
def
convert_tokens_to_string
(
self
,
tokens
):
def
convert_tokens_to_string
(
self
,
tokens
):
"""
Converts a sequence of tokens (string) in a single string.
"""
"""Converts a sequence of tokens (string) in a single string."""
out_string
=
" "
.
join
(
tokens
).
replace
(
" ##"
,
""
).
strip
()
out_string
=
" "
.
join
(
tokens
).
replace
(
" ##"
,
""
).
strip
()
return
out_string
return
out_string
...
...
src/transformers/models/bert_generation/modeling_bert_generation.py
View file @
32dbb2d9
...
@@ -177,7 +177,7 @@ class BertGenerationPreTrainedModel(PreTrainedModel):
...
@@ -177,7 +177,7 @@ class BertGenerationPreTrainedModel(PreTrainedModel):
_keys_to_ignore_on_load_missing
=
[
r
"position_ids"
]
_keys_to_ignore_on_load_missing
=
[
r
"position_ids"
]
def
_init_weights
(
self
,
module
):
def
_init_weights
(
self
,
module
):
"""
Initialize the weights
"""
"""Initialize the weights"""
if
isinstance
(
module
,
nn
.
Linear
):
if
isinstance
(
module
,
nn
.
Linear
):
# Slightly different from the TF version which uses truncated_normal for initialization
# Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617
# cf https://github.com/pytorch/pytorch/pull/5617
...
...
src/transformers/models/bert_generation/tokenization_bert_generation.py
View file @
32dbb2d9
...
@@ -119,7 +119,7 @@ class BertGenerationTokenizer(PreTrainedTokenizer):
...
@@ -119,7 +119,7 @@ class BertGenerationTokenizer(PreTrainedTokenizer):
return
pieces
return
pieces
def
_convert_token_to_id
(
self
,
token
):
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
return
self
.
sp_model
.
piece_to_id
(
token
)
return
self
.
sp_model
.
piece_to_id
(
token
)
def
_convert_id_to_token
(
self
,
index
):
def
_convert_id_to_token
(
self
,
index
):
...
@@ -128,7 +128,7 @@ class BertGenerationTokenizer(PreTrainedTokenizer):
...
@@ -128,7 +128,7 @@ class BertGenerationTokenizer(PreTrainedTokenizer):
return
token
return
token
def
convert_tokens_to_string
(
self
,
tokens
):
def
convert_tokens_to_string
(
self
,
tokens
):
"""
Converts a sequence of tokens (string) in a single string.
"""
"""Converts a sequence of tokens (string) in a single string."""
out_string
=
self
.
sp_model
.
decode_pieces
(
tokens
)
out_string
=
self
.
sp_model
.
decode_pieces
(
tokens
)
return
out_string
return
out_string
...
...
src/transformers/models/bertweet/tokenization_bertweet.py
View file @
32dbb2d9
...
@@ -368,7 +368,7 @@ class BertweetTokenizer(PreTrainedTokenizer):
...
@@ -368,7 +368,7 @@ class BertweetTokenizer(PreTrainedTokenizer):
return
token
return
token
def
_convert_token_to_id
(
self
,
token
):
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
return
self
.
encoder
.
get
(
token
,
self
.
encoder
.
get
(
self
.
unk_token
))
return
self
.
encoder
.
get
(
token
,
self
.
encoder
.
get
(
self
.
unk_token
))
def
_convert_id_to_token
(
self
,
index
):
def
_convert_id_to_token
(
self
,
index
):
...
@@ -376,7 +376,7 @@ class BertweetTokenizer(PreTrainedTokenizer):
...
@@ -376,7 +376,7 @@ class BertweetTokenizer(PreTrainedTokenizer):
return
self
.
decoder
.
get
(
index
,
self
.
unk_token
)
return
self
.
decoder
.
get
(
index
,
self
.
unk_token
)
def
convert_tokens_to_string
(
self
,
tokens
):
def
convert_tokens_to_string
(
self
,
tokens
):
"""
Converts a sequence of tokens (string) in a single string.
"""
"""Converts a sequence of tokens (string) in a single string."""
out_string
=
" "
.
join
(
tokens
).
replace
(
"@@ "
,
""
).
strip
()
out_string
=
" "
.
join
(
tokens
).
replace
(
"@@ "
,
""
).
strip
()
return
out_string
return
out_string
...
...
src/transformers/models/big_bird/modeling_big_bird.py
View file @
32dbb2d9
...
@@ -484,7 +484,7 @@ class BigBirdBlockSparseAttention(nn.Module):
...
@@ -484,7 +484,7 @@ class BigBirdBlockSparseAttention(nn.Module):
@
staticmethod
@
staticmethod
def
torch_bmm_nd
(
inp_1
,
inp_2
,
ndim
=
None
):
def
torch_bmm_nd
(
inp_1
,
inp_2
,
ndim
=
None
):
"""
Fast nd matrix multiplication
"""
"""Fast nd matrix multiplication"""
# faster replacement of torch.einsum ("bhqk,bhkd->bhqd")
# faster replacement of torch.einsum ("bhqk,bhkd->bhqd")
return
torch
.
bmm
(
inp_1
.
reshape
((
-
1
,)
+
inp_1
.
shape
[
-
2
:]),
inp_2
.
reshape
((
-
1
,)
+
inp_2
.
shape
[
-
2
:])).
view
(
return
torch
.
bmm
(
inp_1
.
reshape
((
-
1
,)
+
inp_1
.
shape
[
-
2
:]),
inp_2
.
reshape
((
-
1
,)
+
inp_2
.
shape
[
-
2
:])).
view
(
inp_1
.
shape
[:
ndim
-
2
]
+
(
inp_1
.
shape
[
ndim
-
2
],
inp_2
.
shape
[
ndim
-
1
])
inp_1
.
shape
[:
ndim
-
2
]
+
(
inp_1
.
shape
[
ndim
-
2
],
inp_2
.
shape
[
ndim
-
1
])
...
@@ -492,7 +492,7 @@ class BigBirdBlockSparseAttention(nn.Module):
...
@@ -492,7 +492,7 @@ class BigBirdBlockSparseAttention(nn.Module):
@
staticmethod
@
staticmethod
def
torch_bmm_nd_transpose
(
inp_1
,
inp_2
,
ndim
=
None
):
def
torch_bmm_nd_transpose
(
inp_1
,
inp_2
,
ndim
=
None
):
"""
Fast nd matrix multiplication with transpose
"""
"""Fast nd matrix multiplication with transpose"""
# faster replacement of torch.einsum (bhqd,bhkd->bhqk)
# faster replacement of torch.einsum (bhqd,bhkd->bhqk)
return
torch
.
bmm
(
return
torch
.
bmm
(
inp_1
.
reshape
((
-
1
,)
+
inp_1
.
shape
[
-
2
:]),
inp_2
.
reshape
((
-
1
,)
+
inp_2
.
shape
[
-
2
:]).
transpose
(
1
,
2
)
inp_1
.
reshape
((
-
1
,)
+
inp_1
.
shape
[
-
2
:]),
inp_2
.
reshape
((
-
1
,)
+
inp_2
.
shape
[
-
2
:]).
transpose
(
1
,
2
)
...
@@ -1743,7 +1743,7 @@ class BigBirdPreTrainedModel(PreTrainedModel):
...
@@ -1743,7 +1743,7 @@ class BigBirdPreTrainedModel(PreTrainedModel):
_keys_to_ignore_on_load_missing
=
[
r
"position_ids"
]
_keys_to_ignore_on_load_missing
=
[
r
"position_ids"
]
def
_init_weights
(
self
,
module
):
def
_init_weights
(
self
,
module
):
"""
Initialize the weights
"""
"""Initialize the weights"""
if
isinstance
(
module
,
nn
.
Linear
):
if
isinstance
(
module
,
nn
.
Linear
):
# Slightly different from the TF version which uses truncated_normal for initialization
# Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617
# cf https://github.com/pytorch/pytorch/pull/5617
...
...
src/transformers/models/big_bird/tokenization_big_bird.py
View file @
32dbb2d9
...
@@ -149,7 +149,7 @@ class BigBirdTokenizer(PreTrainedTokenizer):
...
@@ -149,7 +149,7 @@ class BigBirdTokenizer(PreTrainedTokenizer):
return
pieces
return
pieces
def
_convert_token_to_id
(
self
,
token
):
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
return
self
.
sp_model
.
piece_to_id
(
token
)
return
self
.
sp_model
.
piece_to_id
(
token
)
def
_convert_id_to_token
(
self
,
index
):
def
_convert_id_to_token
(
self
,
index
):
...
@@ -158,7 +158,7 @@ class BigBirdTokenizer(PreTrainedTokenizer):
...
@@ -158,7 +158,7 @@ class BigBirdTokenizer(PreTrainedTokenizer):
return
token
return
token
def
convert_tokens_to_string
(
self
,
tokens
):
def
convert_tokens_to_string
(
self
,
tokens
):
"""
Converts a sequence of tokens (string) in a single string.
"""
"""Converts a sequence of tokens (string) in a single string."""
out_string
=
self
.
sp_model
.
decode_pieces
(
tokens
)
out_string
=
self
.
sp_model
.
decode_pieces
(
tokens
)
return
out_string
return
out_string
...
...
src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py
View file @
32dbb2d9
...
@@ -183,7 +183,7 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer):
...
@@ -183,7 +183,7 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer):
return
" "
.
join
(
words
)
return
" "
.
join
(
words
)
def
_tokenize
(
self
,
text
:
str
)
->
List
[
str
]:
def
_tokenize
(
self
,
text
:
str
)
->
List
[
str
]:
"""
Split a string into tokens using BPE."""
"""Split a string into tokens using BPE."""
split_tokens
=
[]
split_tokens
=
[]
words
=
re
.
findall
(
r
"\S+\n?"
,
text
)
words
=
re
.
findall
(
r
"\S+\n?"
,
text
)
...
@@ -193,7 +193,7 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer):
...
@@ -193,7 +193,7 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer):
return
split_tokens
return
split_tokens
def
_convert_token_to_id
(
self
,
token
:
str
)
->
int
:
def
_convert_token_to_id
(
self
,
token
:
str
)
->
int
:
"""
Converts a token to an id using the vocab.
"""
"""Converts a token to an id using the vocab."""
token
=
token
.
lower
()
token
=
token
.
lower
()
return
self
.
encoder
.
get
(
token
,
self
.
encoder
.
get
(
self
.
unk_token
))
return
self
.
encoder
.
get
(
token
,
self
.
encoder
.
get
(
self
.
unk_token
))
...
@@ -202,7 +202,7 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer):
...
@@ -202,7 +202,7 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer):
return
self
.
decoder
.
get
(
index
,
self
.
unk_token
)
return
self
.
decoder
.
get
(
index
,
self
.
unk_token
)
def
convert_tokens_to_string
(
self
,
tokens
:
List
[
str
])
->
str
:
def
convert_tokens_to_string
(
self
,
tokens
:
List
[
str
])
->
str
:
"""
Converts a sequence of tokens in a single string.
"""
"""Converts a sequence of tokens in a single string."""
out_string
=
" "
.
join
(
tokens
).
replace
(
"@@ "
,
""
).
strip
()
out_string
=
" "
.
join
(
tokens
).
replace
(
"@@ "
,
""
).
strip
()
return
out_string
return
out_string
...
...
src/transformers/models/camembert/tokenization_camembert.py
View file @
32dbb2d9
...
@@ -222,7 +222,7 @@ class CamembertTokenizer(PreTrainedTokenizer):
...
@@ -222,7 +222,7 @@ class CamembertTokenizer(PreTrainedTokenizer):
return
self
.
sp_model
.
EncodeAsPieces
(
text
)
return
self
.
sp_model
.
EncodeAsPieces
(
text
)
def
_convert_token_to_id
(
self
,
token
):
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
if
token
in
self
.
fairseq_tokens_to_ids
:
if
token
in
self
.
fairseq_tokens_to_ids
:
return
self
.
fairseq_tokens_to_ids
[
token
]
return
self
.
fairseq_tokens_to_ids
[
token
]
elif
self
.
sp_model
.
PieceToId
(
token
)
==
0
:
elif
self
.
sp_model
.
PieceToId
(
token
)
==
0
:
...
...
src/transformers/models/convbert/modeling_convbert.py
View file @
32dbb2d9
...
@@ -238,7 +238,7 @@ class ConvBertPreTrainedModel(PreTrainedModel):
...
@@ -238,7 +238,7 @@ class ConvBertPreTrainedModel(PreTrainedModel):
authorized_unexpected_keys
=
[
r
"convbert\.embeddings_project\.weight"
,
r
"convbert\.embeddings_project\.bias"
]
authorized_unexpected_keys
=
[
r
"convbert\.embeddings_project\.weight"
,
r
"convbert\.embeddings_project\.bias"
]
def
_init_weights
(
self
,
module
):
def
_init_weights
(
self
,
module
):
"""
Initialize the weights
"""
"""Initialize the weights"""
if
isinstance
(
module
,
nn
.
Linear
):
if
isinstance
(
module
,
nn
.
Linear
):
# Slightly different from the TF version which uses truncated_normal for initialization
# Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617
# cf https://github.com/pytorch/pytorch/pull/5617
...
...
src/transformers/models/ctrl/tokenization_ctrl.py
View file @
32dbb2d9
...
@@ -212,7 +212,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
...
@@ -212,7 +212,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
return
split_tokens
return
split_tokens
def
_convert_token_to_id
(
self
,
token
):
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
return
self
.
encoder
.
get
(
token
,
self
.
encoder
.
get
(
self
.
unk_token
))
return
self
.
encoder
.
get
(
token
,
self
.
encoder
.
get
(
self
.
unk_token
))
def
_convert_id_to_token
(
self
,
index
):
def
_convert_id_to_token
(
self
,
index
):
...
@@ -220,7 +220,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
...
@@ -220,7 +220,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
return
self
.
decoder
.
get
(
index
,
self
.
unk_token
)
return
self
.
decoder
.
get
(
index
,
self
.
unk_token
)
def
convert_tokens_to_string
(
self
,
tokens
):
def
convert_tokens_to_string
(
self
,
tokens
):
"""
Converts a sequence of tokens (string) in a single string.
"""
"""Converts a sequence of tokens (string) in a single string."""
out_string
=
" "
.
join
(
tokens
).
replace
(
"@@ "
,
""
).
strip
()
out_string
=
" "
.
join
(
tokens
).
replace
(
"@@ "
,
""
).
strip
()
return
out_string
return
out_string
...
...
src/transformers/models/deberta_v2/tokenization_deberta_v2.py
View file @
32dbb2d9
...
@@ -134,7 +134,7 @@ class DebertaV2Tokenizer(PreTrainedTokenizer):
...
@@ -134,7 +134,7 @@ class DebertaV2Tokenizer(PreTrainedTokenizer):
return
self
.
_tokenizer
.
tokenize
(
text
)
return
self
.
_tokenizer
.
tokenize
(
text
)
def
_convert_token_to_id
(
self
,
token
):
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
return
self
.
_tokenizer
.
spm
.
PieceToId
(
token
)
return
self
.
_tokenizer
.
spm
.
PieceToId
(
token
)
def
_convert_id_to_token
(
self
,
index
):
def
_convert_id_to_token
(
self
,
index
):
...
@@ -142,7 +142,7 @@ class DebertaV2Tokenizer(PreTrainedTokenizer):
...
@@ -142,7 +142,7 @@ class DebertaV2Tokenizer(PreTrainedTokenizer):
return
self
.
_tokenizer
.
spm
.
IdToPiece
(
index
)
if
index
<
self
.
vocab_size
else
self
.
unk_token
return
self
.
_tokenizer
.
spm
.
IdToPiece
(
index
)
if
index
<
self
.
vocab_size
else
self
.
unk_token
def
convert_tokens_to_string
(
self
,
tokens
):
def
convert_tokens_to_string
(
self
,
tokens
):
"""
Converts a sequence of tokens (string) in a single string.
"""
"""Converts a sequence of tokens (string) in a single string."""
return
self
.
_tokenizer
.
decode
(
tokens
)
return
self
.
_tokenizer
.
decode
(
tokens
)
def
build_inputs_with_special_tokens
(
self
,
token_ids_0
,
token_ids_1
=
None
):
def
build_inputs_with_special_tokens
(
self
,
token_ids_0
,
token_ids_1
=
None
):
...
...
src/transformers/models/deit/modeling_deit.py
View file @
32dbb2d9
...
@@ -386,7 +386,7 @@ class DeiTPreTrainedModel(PreTrainedModel):
...
@@ -386,7 +386,7 @@ class DeiTPreTrainedModel(PreTrainedModel):
base_model_prefix
=
"deit"
base_model_prefix
=
"deit"
def
_init_weights
(
self
,
module
):
def
_init_weights
(
self
,
module
):
"""
Initialize the weights
"""
"""Initialize the weights"""
if
isinstance
(
module
,
(
nn
.
Linear
,
nn
.
Conv2d
)):
if
isinstance
(
module
,
(
nn
.
Linear
,
nn
.
Conv2d
)):
# Slightly different from the TF version which uses truncated_normal for initialization
# Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617
# cf https://github.com/pytorch/pytorch/pull/5617
...
...
src/transformers/models/distilbert/modeling_distilbert.py
View file @
32dbb2d9
...
@@ -167,11 +167,11 @@ class MultiHeadSelfAttention(nn.Module):
...
@@ -167,11 +167,11 @@ class MultiHeadSelfAttention(nn.Module):
mask_reshp
=
(
bs
,
1
,
1
,
k_length
)
mask_reshp
=
(
bs
,
1
,
1
,
k_length
)
def
shape
(
x
):
def
shape
(
x
):
"""
separate heads
"""
"""separate heads"""
return
x
.
view
(
bs
,
-
1
,
self
.
n_heads
,
dim_per_head
).
transpose
(
1
,
2
)
return
x
.
view
(
bs
,
-
1
,
self
.
n_heads
,
dim_per_head
).
transpose
(
1
,
2
)
def
unshape
(
x
):
def
unshape
(
x
):
"""
group heads
"""
"""group heads"""
return
x
.
transpose
(
1
,
2
).
contiguous
().
view
(
bs
,
-
1
,
self
.
n_heads
*
dim_per_head
)
return
x
.
transpose
(
1
,
2
).
contiguous
().
view
(
bs
,
-
1
,
self
.
n_heads
*
dim_per_head
)
q
=
shape
(
self
.
q_lin
(
query
))
# (bs, n_heads, q_length, dim_per_head)
q
=
shape
(
self
.
q_lin
(
query
))
# (bs, n_heads, q_length, dim_per_head)
...
...
src/transformers/models/distilbert/modeling_tf_distilbert.py
View file @
32dbb2d9
...
@@ -175,11 +175,11 @@ class TFMultiHeadSelfAttention(tf.keras.layers.Layer):
...
@@ -175,11 +175,11 @@ class TFMultiHeadSelfAttention(tf.keras.layers.Layer):
mask_reshape
=
[
bs
,
1
,
1
,
k_length
]
mask_reshape
=
[
bs
,
1
,
1
,
k_length
]
def
shape
(
x
):
def
shape
(
x
):
"""
separate heads
"""
"""separate heads"""
return
tf
.
transpose
(
tf
.
reshape
(
x
,
(
bs
,
-
1
,
self
.
n_heads
,
dim_per_head
)),
perm
=
(
0
,
2
,
1
,
3
))
return
tf
.
transpose
(
tf
.
reshape
(
x
,
(
bs
,
-
1
,
self
.
n_heads
,
dim_per_head
)),
perm
=
(
0
,
2
,
1
,
3
))
def
unshape
(
x
):
def
unshape
(
x
):
"""
group heads
"""
"""group heads"""
return
tf
.
reshape
(
tf
.
transpose
(
x
,
perm
=
(
0
,
2
,
1
,
3
)),
(
bs
,
-
1
,
self
.
n_heads
*
dim_per_head
))
return
tf
.
reshape
(
tf
.
transpose
(
x
,
perm
=
(
0
,
2
,
1
,
3
)),
(
bs
,
-
1
,
self
.
n_heads
*
dim_per_head
))
q
=
shape
(
self
.
q_lin
(
query
))
# (bs, n_heads, q_length, dim_per_head)
q
=
shape
(
self
.
q_lin
(
query
))
# (bs, n_heads, q_length, dim_per_head)
...
...
src/transformers/models/electra/modeling_electra.py
View file @
32dbb2d9
...
@@ -653,7 +653,7 @@ class ElectraPreTrainedModel(PreTrainedModel):
...
@@ -653,7 +653,7 @@ class ElectraPreTrainedModel(PreTrainedModel):
# Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
# Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
def
_init_weights
(
self
,
module
):
def
_init_weights
(
self
,
module
):
"""
Initialize the weights
"""
"""Initialize the weights"""
if
isinstance
(
module
,
nn
.
Linear
):
if
isinstance
(
module
,
nn
.
Linear
):
# Slightly different from the TF version which uses truncated_normal for initialization
# Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617
# cf https://github.com/pytorch/pytorch/pull/5617
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment