Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
7732d0fe
Unverified
Commit
7732d0fe
authored
Feb 09, 2022
by
Lysandre Debut
Committed by
GitHub
Feb 09, 2022
Browse files
Upgrade black to version ~=22.0 (#15565)
* Upgrade black to version ~=22.0 * Check copies * Fix code
parent
d923f762
Changes
91
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
30 additions
and
33 deletions
+30
-33
src/transformers/models/led/modeling_led.py
src/transformers/models/led/modeling_led.py
+1
-1
src/transformers/models/led/modeling_tf_led.py
src/transformers/models/led/modeling_tf_led.py
+1
-1
src/transformers/models/longformer/modeling_tf_longformer.py
src/transformers/models/longformer/modeling_tf_longformer.py
+4
-7
src/transformers/models/m2m_100/modeling_m2m_100.py
src/transformers/models/m2m_100/modeling_m2m_100.py
+1
-1
src/transformers/models/marian/modeling_marian.py
src/transformers/models/marian/modeling_marian.py
+1
-1
src/transformers/models/marian/modeling_tf_marian.py
src/transformers/models/marian/modeling_tf_marian.py
+1
-1
src/transformers/models/mbart/modeling_mbart.py
src/transformers/models/mbart/modeling_mbart.py
+1
-1
src/transformers/models/mbart/modeling_tf_mbart.py
src/transformers/models/mbart/modeling_tf_mbart.py
+1
-1
src/transformers/models/pegasus/modeling_pegasus.py
src/transformers/models/pegasus/modeling_pegasus.py
+1
-1
src/transformers/models/pegasus/modeling_tf_pegasus.py
src/transformers/models/pegasus/modeling_tf_pegasus.py
+1
-1
src/transformers/models/perceiver/modeling_perceiver.py
src/transformers/models/perceiver/modeling_perceiver.py
+5
-5
src/transformers/models/perceiver/tokenization_perceiver.py
src/transformers/models/perceiver/tokenization_perceiver.py
+1
-1
src/transformers/models/prophetnet/modeling_prophetnet.py
src/transformers/models/prophetnet/modeling_prophetnet.py
+2
-2
src/transformers/models/reformer/modeling_reformer.py
src/transformers/models/reformer/modeling_reformer.py
+2
-2
src/transformers/models/roberta/tokenization_roberta.py
src/transformers/models/roberta/tokenization_roberta.py
+2
-2
src/transformers/models/sew/modeling_sew.py
src/transformers/models/sew/modeling_sew.py
+1
-1
src/transformers/models/speech_to_text/feature_extraction_speech_to_text.py
...odels/speech_to_text/feature_extraction_speech_to_text.py
+1
-1
src/transformers/models/speech_to_text/modeling_speech_to_text.py
...sformers/models/speech_to_text/modeling_speech_to_text.py
+1
-1
src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
...rmers/models/speech_to_text/modeling_tf_speech_to_text.py
+1
-1
src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py
...mers/models/speech_to_text_2/modeling_speech_to_text_2.py
+1
-1
No files found.
src/transformers/models/led/modeling_led.py
View file @
7732d0fe
...
...
@@ -766,7 +766,7 @@ class LEDDecoderAttention(nn.Module):
assert
(
self
.
head_dim
*
num_heads
==
self
.
embed_dim
),
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
and `num_heads`:
{
num_heads
}
)."
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
...
...
src/transformers/models/led/modeling_tf_led.py
View file @
7732d0fe
...
...
@@ -998,7 +998,7 @@ class TFLEDDecoderAttention(tf.keras.layers.Layer):
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
dropout
)
self
.
head_dim
=
embed_dim
//
num_heads
assert
self
.
head_dim
*
num_heads
==
self
.
embed_dim
,
"embed_dim must be divisible by num_heads"
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
tf
.
keras
.
layers
.
Dense
(
embed_dim
,
use_bias
=
bias
,
name
=
"k_proj"
)
...
...
src/transformers/models/longformer/modeling_tf_longformer.py
View file @
7732d0fe
...
...
@@ -405,13 +405,10 @@ def _compute_global_attention_mask(input_ids_shape, sep_token_indices, before_se
else
:
# last token is separation token and should not be counted and in the middle are two separation tokens
question_end_index
=
tf
.
tile
(
question_end_index
+
1
,
(
1
,
input_ids_shape
[
1
]))
attention_mask
=
(
tf
.
cast
(
attention_mask
>
question_end_index
,
dtype
=
question_end_index
.
dtype
,
)
*
tf
.
cast
(
attention_mask
<
input_ids_shape
[
-
1
],
dtype
=
question_end_index
.
dtype
)
)
attention_mask
=
tf
.
cast
(
attention_mask
>
question_end_index
,
dtype
=
question_end_index
.
dtype
,
)
*
tf
.
cast
(
attention_mask
<
input_ids_shape
[
-
1
],
dtype
=
question_end_index
.
dtype
)
return
attention_mask
...
...
src/transformers/models/m2m_100/modeling_m2m_100.py
View file @
7732d0fe
...
...
@@ -217,7 +217,7 @@ class M2M100Attention(nn.Module):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
...
...
src/transformers/models/marian/modeling_marian.py
View file @
7732d0fe
...
...
@@ -163,7 +163,7 @@ class MarianAttention(nn.Module):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
...
...
src/transformers/models/marian/modeling_tf_marian.py
View file @
7732d0fe
...
...
@@ -194,7 +194,7 @@ class TFMarianAttention(tf.keras.layers.Layer):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
tf
.
keras
.
layers
.
Dense
(
embed_dim
,
use_bias
=
bias
,
name
=
"k_proj"
)
...
...
src/transformers/models/mbart/modeling_mbart.py
View file @
7732d0fe
...
...
@@ -152,7 +152,7 @@ class MBartAttention(nn.Module):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
...
...
src/transformers/models/mbart/modeling_tf_mbart.py
View file @
7732d0fe
...
...
@@ -154,7 +154,7 @@ class TFMBartAttention(tf.keras.layers.Layer):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
tf
.
keras
.
layers
.
Dense
(
embed_dim
,
use_bias
=
bias
,
name
=
"k_proj"
)
...
...
src/transformers/models/pegasus/modeling_pegasus.py
View file @
7732d0fe
...
...
@@ -163,7 +163,7 @@ class PegasusAttention(nn.Module):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
...
...
src/transformers/models/pegasus/modeling_tf_pegasus.py
View file @
7732d0fe
...
...
@@ -195,7 +195,7 @@ class TFPegasusAttention(tf.keras.layers.Layer):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
tf
.
keras
.
layers
.
Dense
(
embed_dim
,
use_bias
=
bias
,
name
=
"k_proj"
)
...
...
src/transformers/models/perceiver/modeling_perceiver.py
View file @
7732d0fe
...
...
@@ -824,7 +824,7 @@ class PerceiverModel(PerceiverPreTrainedModel):
... project_pos_dim=256,
... trainable_position_encoding_kwargs=dict(
... num_channels=256,
... index_dims=config.image_size
**
2,
... index_dims=config.image_size**2,
... ),
... )
...
...
@@ -1205,7 +1205,7 @@ class PerceiverForImageClassificationLearned(PerceiverPreTrainedModel):
def
__init__
(
self
,
config
):
super
().
__init__
(
config
)
trainable_position_encoding_kwargs_preprocessor
=
dict
(
num_channels
=
256
,
index_dims
=
config
.
image_size
**
2
)
trainable_position_encoding_kwargs_preprocessor
=
dict
(
num_channels
=
256
,
index_dims
=
config
.
image_size
**
2
)
trainable_position_encoding_kwargs_decoder
=
dict
(
num_channels
=
config
.
d_latents
,
index_dims
=
1
)
self
.
num_labels
=
config
.
num_labels
...
...
@@ -2485,7 +2485,7 @@ def space_to_depth(frames: torch.Tensor, temporal_block_size: int = 1, spatial_b
batch_size
,
height
//
spatial_block_size
,
width
//
spatial_block_size
,
(
spatial_block_size
**
2
)
*
num_channels
,
(
spatial_block_size
**
2
)
*
num_channels
,
)
return
frames
elif
len
(
frames
.
shape
)
==
5
:
...
...
@@ -2509,7 +2509,7 @@ def space_to_depth(frames: torch.Tensor, temporal_block_size: int = 1, spatial_b
time
//
temporal_block_size
,
height
//
spatial_block_size
,
width
//
spatial_block_size
,
temporal_block_size
*
(
spatial_block_size
**
2
)
*
num_channels
,
temporal_block_size
*
(
spatial_block_size
**
2
)
*
num_channels
,
)
return
frames
else
:
...
...
@@ -3059,7 +3059,7 @@ class PerceiverImagePreprocessor(AbstractPreprocessor):
if
self
.
conv_after_patching
:
inp_dim
=
self
.
out_channels
else
:
inp_dim
=
self
.
in_channels
*
self
.
spatial_downsample
**
2
inp_dim
=
self
.
in_channels
*
self
.
spatial_downsample
**
2
if
is_temporal
:
inp_dim
*=
self
.
temporal_downsample
...
...
src/transformers/models/perceiver/tokenization_perceiver.py
View file @
7732d0fe
...
...
@@ -87,7 +87,7 @@ class PerceiverTokenizer(PreTrainedTokenizer):
**
kwargs
,
)
self
.
_utf_vocab_size
=
2
**
8
# utf is 8 bits
self
.
_utf_vocab_size
=
2
**
8
# utf is 8 bits
# define special tokens dict
self
.
special_tokens_encoder
:
Dict
[
str
,
int
]
=
{
...
...
src/transformers/models/prophetnet/modeling_prophetnet.py
View file @
7732d0fe
...
...
@@ -674,7 +674,7 @@ class ProphetNetAttention(nn.Module):
],
f
"Size of hidden states should be
{
batch_size
,
tgt_len
,
hidden_size
}
, but is
{
hidden_states
.
size
()
}
"
# previous time steps are cached - no need to recompute key and value if they are static
query_states
=
self
.
query_proj
(
hidden_states
)
/
(
self
.
head_dim
**
0.5
)
query_states
=
self
.
query_proj
(
hidden_states
)
/
(
self
.
head_dim
**
0.5
)
if
is_cross_attention
and
past_key_value
is
not
None
:
# reuse k,v, cross_attentions
...
...
@@ -855,7 +855,7 @@ class ProphetNetNgramSelfAttention(nn.Module):
value_states
=
self
.
value_proj
(
hidden_states
)
# normalize
query_states
=
query_states
/
(
self
.
head_dim
**
0.5
)
query_states
=
query_states
/
(
self
.
head_dim
**
0.5
)
# reshape
query_states
=
self
.
_shape
(
query_states
,
ngram_sequence_length
,
batch_size
)
...
...
src/transformers/models/reformer/modeling_reformer.py
View file @
7732d0fe
...
...
@@ -700,7 +700,7 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
# `num_buckets` should be set to 2 * sequence_length // chunk_length as recommended in paper
num_buckets_pow_2
=
(
2
*
(
sequence_length
//
self
.
chunk_length
)).
bit_length
()
-
1
# make sure buckets are power of 2
num_buckets
=
2
**
num_buckets_pow_2
num_buckets
=
2
**
num_buckets_pow_2
# factorize `num_buckets` if `num_buckets` becomes too large
num_buckets_limit
=
2
*
max
(
...
...
@@ -966,7 +966,7 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
"""
length normalization
"""
variance
=
torch
.
mean
(
x
**
2
,
-
1
,
keepdim
=
True
)
variance
=
torch
.
mean
(
x
**
2
,
-
1
,
keepdim
=
True
)
norm_x
=
x
*
torch
.
rsqrt
(
variance
+
epsilon
)
return
norm_x
...
...
src/transformers/models/roberta/tokenization_roberta.py
View file @
7732d0fe
...
...
@@ -77,10 +77,10 @@ def bytes_to_unicode():
)
cs
=
bs
[:]
n
=
0
for
b
in
range
(
2
**
8
):
for
b
in
range
(
2
**
8
):
if
b
not
in
bs
:
bs
.
append
(
b
)
cs
.
append
(
2
**
8
+
n
)
cs
.
append
(
2
**
8
+
n
)
n
+=
1
cs
=
[
chr
(
n
)
for
n
in
cs
]
return
dict
(
zip
(
bs
,
cs
))
...
...
src/transformers/models/sew/modeling_sew.py
View file @
7732d0fe
...
...
@@ -420,7 +420,7 @@ class SEWAttention(nn.Module):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
...
...
src/transformers/models/speech_to_text/feature_extraction_speech_to_text.py
View file @
7732d0fe
...
...
@@ -86,7 +86,7 @@ class Speech2TextFeatureExtractor(SequenceFeatureExtractor):
Get mel-filter bank features using TorchAudio. Note that TorchAudio requires 16-bit signed integers as inputs
and hence the waveform should not be normalized before feature extraction.
"""
waveform
=
waveform
*
(
2
**
15
)
# Kaldi compliance: 16-bit signed integers
waveform
=
waveform
*
(
2
**
15
)
# Kaldi compliance: 16-bit signed integers
waveform
=
torch
.
from_numpy
(
waveform
).
unsqueeze
(
0
)
features
=
ta_kaldi
.
fbank
(
waveform
,
num_mel_bins
=
self
.
num_mel_bins
,
sample_frequency
=
self
.
sampling_rate
)
return
features
.
numpy
()
...
...
src/transformers/models/speech_to_text/modeling_speech_to_text.py
View file @
7732d0fe
...
...
@@ -230,7 +230,7 @@ class Speech2TextAttention(nn.Module):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
...
...
src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
View file @
7732d0fe
...
...
@@ -256,7 +256,7 @@ class TFSpeech2TextAttention(tf.keras.layers.Layer):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
tf
.
keras
.
layers
.
Dense
(
embed_dim
,
use_bias
=
bias
,
name
=
"k_proj"
)
...
...
src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py
View file @
7732d0fe
...
...
@@ -170,7 +170,7 @@ class Speech2Text2Attention(nn.Module):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment