Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
32dbb2d9
Unverified
Commit
32dbb2d9
authored
Apr 26, 2021
by
Patrick von Platen
Committed by
GitHub
Apr 26, 2021
Browse files
make style (#11442)
parent
04ab2ca6
Changes
105
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
33 additions
and
33 deletions
+33
-33
src/transformers/models/albert/modeling_albert.py
src/transformers/models/albert/modeling_albert.py
+1
-1
src/transformers/models/albert/modeling_tf_albert.py
src/transformers/models/albert/modeling_tf_albert.py
+1
-1
src/transformers/models/albert/tokenization_albert.py
src/transformers/models/albert/tokenization_albert.py
+2
-2
src/transformers/models/barthez/tokenization_barthez.py
src/transformers/models/barthez/tokenization_barthez.py
+1
-1
src/transformers/models/bert/modeling_bert.py
src/transformers/models/bert/modeling_bert.py
+1
-1
src/transformers/models/bert/tokenization_bert.py
src/transformers/models/bert/tokenization_bert.py
+2
-2
src/transformers/models/bert_generation/modeling_bert_generation.py
...ormers/models/bert_generation/modeling_bert_generation.py
+1
-1
src/transformers/models/bert_generation/tokenization_bert_generation.py
...rs/models/bert_generation/tokenization_bert_generation.py
+2
-2
src/transformers/models/bertweet/tokenization_bertweet.py
src/transformers/models/bertweet/tokenization_bertweet.py
+2
-2
src/transformers/models/big_bird/modeling_big_bird.py
src/transformers/models/big_bird/modeling_big_bird.py
+3
-3
src/transformers/models/big_bird/tokenization_big_bird.py
src/transformers/models/big_bird/tokenization_big_bird.py
+2
-2
src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py
.../models/blenderbot_small/tokenization_blenderbot_small.py
+3
-3
src/transformers/models/camembert/tokenization_camembert.py
src/transformers/models/camembert/tokenization_camembert.py
+1
-1
src/transformers/models/convbert/modeling_convbert.py
src/transformers/models/convbert/modeling_convbert.py
+1
-1
src/transformers/models/ctrl/tokenization_ctrl.py
src/transformers/models/ctrl/tokenization_ctrl.py
+2
-2
src/transformers/models/deberta_v2/tokenization_deberta_v2.py
...transformers/models/deberta_v2/tokenization_deberta_v2.py
+2
-2
src/transformers/models/deit/modeling_deit.py
src/transformers/models/deit/modeling_deit.py
+1
-1
src/transformers/models/distilbert/modeling_distilbert.py
src/transformers/models/distilbert/modeling_distilbert.py
+2
-2
src/transformers/models/distilbert/modeling_tf_distilbert.py
src/transformers/models/distilbert/modeling_tf_distilbert.py
+2
-2
src/transformers/models/electra/modeling_electra.py
src/transformers/models/electra/modeling_electra.py
+1
-1
No files found.
src/transformers/models/albert/modeling_albert.py
View file @
32dbb2d9
...
...
@@ -71,7 +71,7 @@ ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
def
load_tf_weights_in_albert
(
model
,
config
,
tf_checkpoint_path
):
"""
Load tf checkpoints in a pytorch model."""
"""Load tf checkpoints in a pytorch model."""
try
:
import
re
...
...
src/transformers/models/albert/modeling_tf_albert.py
View file @
32dbb2d9
...
...
@@ -189,7 +189,7 @@ class TFAlbertEmbeddings(tf.keras.layers.Layer):
class
TFAlbertAttention
(
tf
.
keras
.
layers
.
Layer
):
"""
Contains the complete attention sublayer, including both dropouts and layer norm.
"""
"""Contains the complete attention sublayer, including both dropouts and layer norm."""
def
__init__
(
self
,
config
:
AlbertConfig
,
**
kwargs
):
super
().
__init__
(
**
kwargs
)
...
...
src/transformers/models/albert/tokenization_albert.py
View file @
32dbb2d9
...
...
@@ -187,7 +187,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
return
outputs
def
_tokenize
(
self
,
text
,
sample
=
False
):
"""
Tokenize a string.
"""
"""Tokenize a string."""
text
=
self
.
preprocess_text
(
text
)
if
not
sample
:
...
...
@@ -211,7 +211,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
return
new_pieces
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
return
self
.
sp_model
.
PieceToId
(
token
)
def
_convert_id_to_token
(
self
,
index
):
...
...
src/transformers/models/barthez/tokenization_barthez.py
View file @
32dbb2d9
...
...
@@ -223,7 +223,7 @@ class BarthezTokenizer(PreTrainedTokenizer):
return
self
.
sp_model
.
EncodeAsPieces
(
text
)
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
if
token
in
self
.
fairseq_tokens_to_ids
:
return
self
.
fairseq_tokens_to_ids
[
token
]
spm_id
=
self
.
sp_model
.
PieceToId
(
token
)
...
...
src/transformers/models/bert/modeling_bert.py
View file @
32dbb2d9
...
...
@@ -703,7 +703,7 @@ class BertPreTrainedModel(PreTrainedModel):
_keys_to_ignore_on_load_missing
=
[
r
"position_ids"
]
def
_init_weights
(
self
,
module
):
"""
Initialize the weights
"""
"""Initialize the weights"""
if
isinstance
(
module
,
nn
.
Linear
):
# Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617
...
...
src/transformers/models/bert/tokenization_bert.py
View file @
32dbb2d9
...
...
@@ -233,7 +233,7 @@ class BertTokenizer(PreTrainedTokenizer):
return
split_tokens
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
return
self
.
vocab
.
get
(
token
,
self
.
vocab
.
get
(
self
.
unk_token
))
def
_convert_id_to_token
(
self
,
index
):
...
...
@@ -241,7 +241,7 @@ class BertTokenizer(PreTrainedTokenizer):
return
self
.
ids_to_tokens
.
get
(
index
,
self
.
unk_token
)
def
convert_tokens_to_string
(
self
,
tokens
):
"""
Converts a sequence of tokens (string) in a single string.
"""
"""Converts a sequence of tokens (string) in a single string."""
out_string
=
" "
.
join
(
tokens
).
replace
(
" ##"
,
""
).
strip
()
return
out_string
...
...
src/transformers/models/bert_generation/modeling_bert_generation.py
View file @
32dbb2d9
...
...
@@ -177,7 +177,7 @@ class BertGenerationPreTrainedModel(PreTrainedModel):
_keys_to_ignore_on_load_missing
=
[
r
"position_ids"
]
def
_init_weights
(
self
,
module
):
"""
Initialize the weights
"""
"""Initialize the weights"""
if
isinstance
(
module
,
nn
.
Linear
):
# Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617
...
...
src/transformers/models/bert_generation/tokenization_bert_generation.py
View file @
32dbb2d9
...
...
@@ -119,7 +119,7 @@ class BertGenerationTokenizer(PreTrainedTokenizer):
return
pieces
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
return
self
.
sp_model
.
piece_to_id
(
token
)
def
_convert_id_to_token
(
self
,
index
):
...
...
@@ -128,7 +128,7 @@ class BertGenerationTokenizer(PreTrainedTokenizer):
return
token
def
convert_tokens_to_string
(
self
,
tokens
):
"""
Converts a sequence of tokens (string) in a single string.
"""
"""Converts a sequence of tokens (string) in a single string."""
out_string
=
self
.
sp_model
.
decode_pieces
(
tokens
)
return
out_string
...
...
src/transformers/models/bertweet/tokenization_bertweet.py
View file @
32dbb2d9
...
...
@@ -368,7 +368,7 @@ class BertweetTokenizer(PreTrainedTokenizer):
return
token
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
return
self
.
encoder
.
get
(
token
,
self
.
encoder
.
get
(
self
.
unk_token
))
def
_convert_id_to_token
(
self
,
index
):
...
...
@@ -376,7 +376,7 @@ class BertweetTokenizer(PreTrainedTokenizer):
return
self
.
decoder
.
get
(
index
,
self
.
unk_token
)
def
convert_tokens_to_string
(
self
,
tokens
):
"""
Converts a sequence of tokens (string) in a single string.
"""
"""Converts a sequence of tokens (string) in a single string."""
out_string
=
" "
.
join
(
tokens
).
replace
(
"@@ "
,
""
).
strip
()
return
out_string
...
...
src/transformers/models/big_bird/modeling_big_bird.py
View file @
32dbb2d9
...
...
@@ -484,7 +484,7 @@ class BigBirdBlockSparseAttention(nn.Module):
@
staticmethod
def
torch_bmm_nd
(
inp_1
,
inp_2
,
ndim
=
None
):
"""
Fast nd matrix multiplication
"""
"""Fast nd matrix multiplication"""
# faster replacement of torch.einsum ("bhqk,bhkd->bhqd")
return
torch
.
bmm
(
inp_1
.
reshape
((
-
1
,)
+
inp_1
.
shape
[
-
2
:]),
inp_2
.
reshape
((
-
1
,)
+
inp_2
.
shape
[
-
2
:])).
view
(
inp_1
.
shape
[:
ndim
-
2
]
+
(
inp_1
.
shape
[
ndim
-
2
],
inp_2
.
shape
[
ndim
-
1
])
...
...
@@ -492,7 +492,7 @@ class BigBirdBlockSparseAttention(nn.Module):
@
staticmethod
def
torch_bmm_nd_transpose
(
inp_1
,
inp_2
,
ndim
=
None
):
"""
Fast nd matrix multiplication with transpose
"""
"""Fast nd matrix multiplication with transpose"""
# faster replacement of torch.einsum (bhqd,bhkd->bhqk)
return
torch
.
bmm
(
inp_1
.
reshape
((
-
1
,)
+
inp_1
.
shape
[
-
2
:]),
inp_2
.
reshape
((
-
1
,)
+
inp_2
.
shape
[
-
2
:]).
transpose
(
1
,
2
)
...
...
@@ -1743,7 +1743,7 @@ class BigBirdPreTrainedModel(PreTrainedModel):
_keys_to_ignore_on_load_missing
=
[
r
"position_ids"
]
def
_init_weights
(
self
,
module
):
"""
Initialize the weights
"""
"""Initialize the weights"""
if
isinstance
(
module
,
nn
.
Linear
):
# Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617
...
...
src/transformers/models/big_bird/tokenization_big_bird.py
View file @
32dbb2d9
...
...
@@ -149,7 +149,7 @@ class BigBirdTokenizer(PreTrainedTokenizer):
return
pieces
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
return
self
.
sp_model
.
piece_to_id
(
token
)
def
_convert_id_to_token
(
self
,
index
):
...
...
@@ -158,7 +158,7 @@ class BigBirdTokenizer(PreTrainedTokenizer):
return
token
def
convert_tokens_to_string
(
self
,
tokens
):
"""
Converts a sequence of tokens (string) in a single string.
"""
"""Converts a sequence of tokens (string) in a single string."""
out_string
=
self
.
sp_model
.
decode_pieces
(
tokens
)
return
out_string
...
...
src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py
View file @
32dbb2d9
...
...
@@ -183,7 +183,7 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer):
return
" "
.
join
(
words
)
def
_tokenize
(
self
,
text
:
str
)
->
List
[
str
]:
"""
Split a string into tokens using BPE."""
"""Split a string into tokens using BPE."""
split_tokens
=
[]
words
=
re
.
findall
(
r
"\S+\n?"
,
text
)
...
...
@@ -193,7 +193,7 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer):
return
split_tokens
def
_convert_token_to_id
(
self
,
token
:
str
)
->
int
:
"""
Converts a token to an id using the vocab.
"""
"""Converts a token to an id using the vocab."""
token
=
token
.
lower
()
return
self
.
encoder
.
get
(
token
,
self
.
encoder
.
get
(
self
.
unk_token
))
...
...
@@ -202,7 +202,7 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer):
return
self
.
decoder
.
get
(
index
,
self
.
unk_token
)
def
convert_tokens_to_string
(
self
,
tokens
:
List
[
str
])
->
str
:
"""
Converts a sequence of tokens in a single string.
"""
"""Converts a sequence of tokens in a single string."""
out_string
=
" "
.
join
(
tokens
).
replace
(
"@@ "
,
""
).
strip
()
return
out_string
...
...
src/transformers/models/camembert/tokenization_camembert.py
View file @
32dbb2d9
...
...
@@ -222,7 +222,7 @@ class CamembertTokenizer(PreTrainedTokenizer):
return
self
.
sp_model
.
EncodeAsPieces
(
text
)
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
if
token
in
self
.
fairseq_tokens_to_ids
:
return
self
.
fairseq_tokens_to_ids
[
token
]
elif
self
.
sp_model
.
PieceToId
(
token
)
==
0
:
...
...
src/transformers/models/convbert/modeling_convbert.py
View file @
32dbb2d9
...
...
@@ -238,7 +238,7 @@ class ConvBertPreTrainedModel(PreTrainedModel):
authorized_unexpected_keys
=
[
r
"convbert\.embeddings_project\.weight"
,
r
"convbert\.embeddings_project\.bias"
]
def
_init_weights
(
self
,
module
):
"""
Initialize the weights
"""
"""Initialize the weights"""
if
isinstance
(
module
,
nn
.
Linear
):
# Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617
...
...
src/transformers/models/ctrl/tokenization_ctrl.py
View file @
32dbb2d9
...
...
@@ -212,7 +212,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
return
split_tokens
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
return
self
.
encoder
.
get
(
token
,
self
.
encoder
.
get
(
self
.
unk_token
))
def
_convert_id_to_token
(
self
,
index
):
...
...
@@ -220,7 +220,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
return
self
.
decoder
.
get
(
index
,
self
.
unk_token
)
def
convert_tokens_to_string
(
self
,
tokens
):
"""
Converts a sequence of tokens (string) in a single string.
"""
"""Converts a sequence of tokens (string) in a single string."""
out_string
=
" "
.
join
(
tokens
).
replace
(
"@@ "
,
""
).
strip
()
return
out_string
...
...
src/transformers/models/deberta_v2/tokenization_deberta_v2.py
View file @
32dbb2d9
...
...
@@ -134,7 +134,7 @@ class DebertaV2Tokenizer(PreTrainedTokenizer):
return
self
.
_tokenizer
.
tokenize
(
text
)
def
_convert_token_to_id
(
self
,
token
):
"""
Converts a token (str) in an id using the vocab.
"""
"""Converts a token (str) in an id using the vocab."""
return
self
.
_tokenizer
.
spm
.
PieceToId
(
token
)
def
_convert_id_to_token
(
self
,
index
):
...
...
@@ -142,7 +142,7 @@ class DebertaV2Tokenizer(PreTrainedTokenizer):
return
self
.
_tokenizer
.
spm
.
IdToPiece
(
index
)
if
index
<
self
.
vocab_size
else
self
.
unk_token
def
convert_tokens_to_string
(
self
,
tokens
):
"""
Converts a sequence of tokens (string) in a single string.
"""
"""Converts a sequence of tokens (string) in a single string."""
return
self
.
_tokenizer
.
decode
(
tokens
)
def
build_inputs_with_special_tokens
(
self
,
token_ids_0
,
token_ids_1
=
None
):
...
...
src/transformers/models/deit/modeling_deit.py
View file @
32dbb2d9
...
...
@@ -386,7 +386,7 @@ class DeiTPreTrainedModel(PreTrainedModel):
base_model_prefix
=
"deit"
def
_init_weights
(
self
,
module
):
"""
Initialize the weights
"""
"""Initialize the weights"""
if
isinstance
(
module
,
(
nn
.
Linear
,
nn
.
Conv2d
)):
# Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617
...
...
src/transformers/models/distilbert/modeling_distilbert.py
View file @
32dbb2d9
...
...
@@ -167,11 +167,11 @@ class MultiHeadSelfAttention(nn.Module):
mask_reshp
=
(
bs
,
1
,
1
,
k_length
)
def
shape
(
x
):
"""
separate heads
"""
"""separate heads"""
return
x
.
view
(
bs
,
-
1
,
self
.
n_heads
,
dim_per_head
).
transpose
(
1
,
2
)
def
unshape
(
x
):
"""
group heads
"""
"""group heads"""
return
x
.
transpose
(
1
,
2
).
contiguous
().
view
(
bs
,
-
1
,
self
.
n_heads
*
dim_per_head
)
q
=
shape
(
self
.
q_lin
(
query
))
# (bs, n_heads, q_length, dim_per_head)
...
...
src/transformers/models/distilbert/modeling_tf_distilbert.py
View file @
32dbb2d9
...
...
@@ -175,11 +175,11 @@ class TFMultiHeadSelfAttention(tf.keras.layers.Layer):
mask_reshape
=
[
bs
,
1
,
1
,
k_length
]
def
shape
(
x
):
"""
separate heads
"""
"""separate heads"""
return
tf
.
transpose
(
tf
.
reshape
(
x
,
(
bs
,
-
1
,
self
.
n_heads
,
dim_per_head
)),
perm
=
(
0
,
2
,
1
,
3
))
def
unshape
(
x
):
"""
group heads
"""
"""group heads"""
return
tf
.
reshape
(
tf
.
transpose
(
x
,
perm
=
(
0
,
2
,
1
,
3
)),
(
bs
,
-
1
,
self
.
n_heads
*
dim_per_head
))
q
=
shape
(
self
.
q_lin
(
query
))
# (bs, n_heads, q_length, dim_per_head)
...
...
src/transformers/models/electra/modeling_electra.py
View file @
32dbb2d9
...
...
@@ -653,7 +653,7 @@ class ElectraPreTrainedModel(PreTrainedModel):
# Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
def
_init_weights
(
self
,
module
):
"""
Initialize the weights
"""
"""Initialize the weights"""
if
isinstance
(
module
,
nn
.
Linear
):
# Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment