Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
7732d0fe
Unverified
Commit
7732d0fe
authored
Feb 09, 2022
by
Lysandre Debut
Committed by
GitHub
Feb 09, 2022
Browse files
Upgrade black to version ~=22.0 (#15565)
* Upgrade black to version ~=22.0 * Check copies * Fix code
parent
d923f762
Changes
91
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
30 additions
and
33 deletions
+30
-33
src/transformers/models/led/modeling_led.py
src/transformers/models/led/modeling_led.py
+1
-1
src/transformers/models/led/modeling_tf_led.py
src/transformers/models/led/modeling_tf_led.py
+1
-1
src/transformers/models/longformer/modeling_tf_longformer.py
src/transformers/models/longformer/modeling_tf_longformer.py
+4
-7
src/transformers/models/m2m_100/modeling_m2m_100.py
src/transformers/models/m2m_100/modeling_m2m_100.py
+1
-1
src/transformers/models/marian/modeling_marian.py
src/transformers/models/marian/modeling_marian.py
+1
-1
src/transformers/models/marian/modeling_tf_marian.py
src/transformers/models/marian/modeling_tf_marian.py
+1
-1
src/transformers/models/mbart/modeling_mbart.py
src/transformers/models/mbart/modeling_mbart.py
+1
-1
src/transformers/models/mbart/modeling_tf_mbart.py
src/transformers/models/mbart/modeling_tf_mbart.py
+1
-1
src/transformers/models/pegasus/modeling_pegasus.py
src/transformers/models/pegasus/modeling_pegasus.py
+1
-1
src/transformers/models/pegasus/modeling_tf_pegasus.py
src/transformers/models/pegasus/modeling_tf_pegasus.py
+1
-1
src/transformers/models/perceiver/modeling_perceiver.py
src/transformers/models/perceiver/modeling_perceiver.py
+5
-5
src/transformers/models/perceiver/tokenization_perceiver.py
src/transformers/models/perceiver/tokenization_perceiver.py
+1
-1
src/transformers/models/prophetnet/modeling_prophetnet.py
src/transformers/models/prophetnet/modeling_prophetnet.py
+2
-2
src/transformers/models/reformer/modeling_reformer.py
src/transformers/models/reformer/modeling_reformer.py
+2
-2
src/transformers/models/roberta/tokenization_roberta.py
src/transformers/models/roberta/tokenization_roberta.py
+2
-2
src/transformers/models/sew/modeling_sew.py
src/transformers/models/sew/modeling_sew.py
+1
-1
src/transformers/models/speech_to_text/feature_extraction_speech_to_text.py
...odels/speech_to_text/feature_extraction_speech_to_text.py
+1
-1
src/transformers/models/speech_to_text/modeling_speech_to_text.py
...sformers/models/speech_to_text/modeling_speech_to_text.py
+1
-1
src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
...rmers/models/speech_to_text/modeling_tf_speech_to_text.py
+1
-1
src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py
...mers/models/speech_to_text_2/modeling_speech_to_text_2.py
+1
-1
No files found.
src/transformers/models/led/modeling_led.py
View file @
7732d0fe
...
@@ -766,7 +766,7 @@ class LEDDecoderAttention(nn.Module):
...
@@ -766,7 +766,7 @@ class LEDDecoderAttention(nn.Module):
assert
(
assert
(
self
.
head_dim
*
num_heads
==
self
.
embed_dim
self
.
head_dim
*
num_heads
==
self
.
embed_dim
),
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
and `num_heads`:
{
num_heads
}
)."
),
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
and `num_heads`:
{
num_heads
}
)."
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
...
...
src/transformers/models/led/modeling_tf_led.py
View file @
7732d0fe
...
@@ -998,7 +998,7 @@ class TFLEDDecoderAttention(tf.keras.layers.Layer):
...
@@ -998,7 +998,7 @@ class TFLEDDecoderAttention(tf.keras.layers.Layer):
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
dropout
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
dropout
)
self
.
head_dim
=
embed_dim
//
num_heads
self
.
head_dim
=
embed_dim
//
num_heads
assert
self
.
head_dim
*
num_heads
==
self
.
embed_dim
,
"embed_dim must be divisible by num_heads"
assert
self
.
head_dim
*
num_heads
==
self
.
embed_dim
,
"embed_dim must be divisible by num_heads"
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
tf
.
keras
.
layers
.
Dense
(
embed_dim
,
use_bias
=
bias
,
name
=
"k_proj"
)
self
.
k_proj
=
tf
.
keras
.
layers
.
Dense
(
embed_dim
,
use_bias
=
bias
,
name
=
"k_proj"
)
...
...
src/transformers/models/longformer/modeling_tf_longformer.py
View file @
7732d0fe
...
@@ -405,13 +405,10 @@ def _compute_global_attention_mask(input_ids_shape, sep_token_indices, before_se
...
@@ -405,13 +405,10 @@ def _compute_global_attention_mask(input_ids_shape, sep_token_indices, before_se
else
:
else
:
# last token is separation token and should not be counted and in the middle are two separation tokens
# last token is separation token and should not be counted and in the middle are two separation tokens
question_end_index
=
tf
.
tile
(
question_end_index
+
1
,
(
1
,
input_ids_shape
[
1
]))
question_end_index
=
tf
.
tile
(
question_end_index
+
1
,
(
1
,
input_ids_shape
[
1
]))
attention_mask
=
(
attention_mask
=
tf
.
cast
(
tf
.
cast
(
attention_mask
>
question_end_index
,
attention_mask
>
question_end_index
,
dtype
=
question_end_index
.
dtype
,
dtype
=
question_end_index
.
dtype
,
)
*
tf
.
cast
(
attention_mask
<
input_ids_shape
[
-
1
],
dtype
=
question_end_index
.
dtype
)
)
*
tf
.
cast
(
attention_mask
<
input_ids_shape
[
-
1
],
dtype
=
question_end_index
.
dtype
)
)
return
attention_mask
return
attention_mask
...
...
src/transformers/models/m2m_100/modeling_m2m_100.py
View file @
7732d0fe
...
@@ -217,7 +217,7 @@ class M2M100Attention(nn.Module):
...
@@ -217,7 +217,7 @@ class M2M100Attention(nn.Module):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
f
" and `num_heads`:
{
num_heads
}
)."
)
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
...
...
src/transformers/models/marian/modeling_marian.py
View file @
7732d0fe
...
@@ -163,7 +163,7 @@ class MarianAttention(nn.Module):
...
@@ -163,7 +163,7 @@ class MarianAttention(nn.Module):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
f
" and `num_heads`:
{
num_heads
}
)."
)
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
...
...
src/transformers/models/marian/modeling_tf_marian.py
View file @
7732d0fe
...
@@ -194,7 +194,7 @@ class TFMarianAttention(tf.keras.layers.Layer):
...
@@ -194,7 +194,7 @@ class TFMarianAttention(tf.keras.layers.Layer):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
f
" and `num_heads`:
{
num_heads
}
)."
)
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
tf
.
keras
.
layers
.
Dense
(
embed_dim
,
use_bias
=
bias
,
name
=
"k_proj"
)
self
.
k_proj
=
tf
.
keras
.
layers
.
Dense
(
embed_dim
,
use_bias
=
bias
,
name
=
"k_proj"
)
...
...
src/transformers/models/mbart/modeling_mbart.py
View file @
7732d0fe
...
@@ -152,7 +152,7 @@ class MBartAttention(nn.Module):
...
@@ -152,7 +152,7 @@ class MBartAttention(nn.Module):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
f
" and `num_heads`:
{
num_heads
}
)."
)
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
...
...
src/transformers/models/mbart/modeling_tf_mbart.py
View file @
7732d0fe
...
@@ -154,7 +154,7 @@ class TFMBartAttention(tf.keras.layers.Layer):
...
@@ -154,7 +154,7 @@ class TFMBartAttention(tf.keras.layers.Layer):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
f
" and `num_heads`:
{
num_heads
}
)."
)
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
tf
.
keras
.
layers
.
Dense
(
embed_dim
,
use_bias
=
bias
,
name
=
"k_proj"
)
self
.
k_proj
=
tf
.
keras
.
layers
.
Dense
(
embed_dim
,
use_bias
=
bias
,
name
=
"k_proj"
)
...
...
src/transformers/models/pegasus/modeling_pegasus.py
View file @
7732d0fe
...
@@ -163,7 +163,7 @@ class PegasusAttention(nn.Module):
...
@@ -163,7 +163,7 @@ class PegasusAttention(nn.Module):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
f
" and `num_heads`:
{
num_heads
}
)."
)
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
...
...
src/transformers/models/pegasus/modeling_tf_pegasus.py
View file @
7732d0fe
...
@@ -195,7 +195,7 @@ class TFPegasusAttention(tf.keras.layers.Layer):
...
@@ -195,7 +195,7 @@ class TFPegasusAttention(tf.keras.layers.Layer):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
f
" and `num_heads`:
{
num_heads
}
)."
)
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
tf
.
keras
.
layers
.
Dense
(
embed_dim
,
use_bias
=
bias
,
name
=
"k_proj"
)
self
.
k_proj
=
tf
.
keras
.
layers
.
Dense
(
embed_dim
,
use_bias
=
bias
,
name
=
"k_proj"
)
...
...
src/transformers/models/perceiver/modeling_perceiver.py
View file @
7732d0fe
...
@@ -824,7 +824,7 @@ class PerceiverModel(PerceiverPreTrainedModel):
...
@@ -824,7 +824,7 @@ class PerceiverModel(PerceiverPreTrainedModel):
... project_pos_dim=256,
... project_pos_dim=256,
... trainable_position_encoding_kwargs=dict(
... trainable_position_encoding_kwargs=dict(
... num_channels=256,
... num_channels=256,
... index_dims=config.image_size
**
2,
... index_dims=config.image_size**2,
... ),
... ),
... )
... )
...
@@ -1205,7 +1205,7 @@ class PerceiverForImageClassificationLearned(PerceiverPreTrainedModel):
...
@@ -1205,7 +1205,7 @@ class PerceiverForImageClassificationLearned(PerceiverPreTrainedModel):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
().
__init__
(
config
)
super
().
__init__
(
config
)
trainable_position_encoding_kwargs_preprocessor
=
dict
(
num_channels
=
256
,
index_dims
=
config
.
image_size
**
2
)
trainable_position_encoding_kwargs_preprocessor
=
dict
(
num_channels
=
256
,
index_dims
=
config
.
image_size
**
2
)
trainable_position_encoding_kwargs_decoder
=
dict
(
num_channels
=
config
.
d_latents
,
index_dims
=
1
)
trainable_position_encoding_kwargs_decoder
=
dict
(
num_channels
=
config
.
d_latents
,
index_dims
=
1
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
...
@@ -2485,7 +2485,7 @@ def space_to_depth(frames: torch.Tensor, temporal_block_size: int = 1, spatial_b
...
@@ -2485,7 +2485,7 @@ def space_to_depth(frames: torch.Tensor, temporal_block_size: int = 1, spatial_b
batch_size
,
batch_size
,
height
//
spatial_block_size
,
height
//
spatial_block_size
,
width
//
spatial_block_size
,
width
//
spatial_block_size
,
(
spatial_block_size
**
2
)
*
num_channels
,
(
spatial_block_size
**
2
)
*
num_channels
,
)
)
return
frames
return
frames
elif
len
(
frames
.
shape
)
==
5
:
elif
len
(
frames
.
shape
)
==
5
:
...
@@ -2509,7 +2509,7 @@ def space_to_depth(frames: torch.Tensor, temporal_block_size: int = 1, spatial_b
...
@@ -2509,7 +2509,7 @@ def space_to_depth(frames: torch.Tensor, temporal_block_size: int = 1, spatial_b
time
//
temporal_block_size
,
time
//
temporal_block_size
,
height
//
spatial_block_size
,
height
//
spatial_block_size
,
width
//
spatial_block_size
,
width
//
spatial_block_size
,
temporal_block_size
*
(
spatial_block_size
**
2
)
*
num_channels
,
temporal_block_size
*
(
spatial_block_size
**
2
)
*
num_channels
,
)
)
return
frames
return
frames
else
:
else
:
...
@@ -3059,7 +3059,7 @@ class PerceiverImagePreprocessor(AbstractPreprocessor):
...
@@ -3059,7 +3059,7 @@ class PerceiverImagePreprocessor(AbstractPreprocessor):
if
self
.
conv_after_patching
:
if
self
.
conv_after_patching
:
inp_dim
=
self
.
out_channels
inp_dim
=
self
.
out_channels
else
:
else
:
inp_dim
=
self
.
in_channels
*
self
.
spatial_downsample
**
2
inp_dim
=
self
.
in_channels
*
self
.
spatial_downsample
**
2
if
is_temporal
:
if
is_temporal
:
inp_dim
*=
self
.
temporal_downsample
inp_dim
*=
self
.
temporal_downsample
...
...
src/transformers/models/perceiver/tokenization_perceiver.py
View file @
7732d0fe
...
@@ -87,7 +87,7 @@ class PerceiverTokenizer(PreTrainedTokenizer):
...
@@ -87,7 +87,7 @@ class PerceiverTokenizer(PreTrainedTokenizer):
**
kwargs
,
**
kwargs
,
)
)
self
.
_utf_vocab_size
=
2
**
8
# utf is 8 bits
self
.
_utf_vocab_size
=
2
**
8
# utf is 8 bits
# define special tokens dict
# define special tokens dict
self
.
special_tokens_encoder
:
Dict
[
str
,
int
]
=
{
self
.
special_tokens_encoder
:
Dict
[
str
,
int
]
=
{
...
...
src/transformers/models/prophetnet/modeling_prophetnet.py
View file @
7732d0fe
...
@@ -674,7 +674,7 @@ class ProphetNetAttention(nn.Module):
...
@@ -674,7 +674,7 @@ class ProphetNetAttention(nn.Module):
],
f
"Size of hidden states should be
{
batch_size
,
tgt_len
,
hidden_size
}
, but is
{
hidden_states
.
size
()
}
"
],
f
"Size of hidden states should be
{
batch_size
,
tgt_len
,
hidden_size
}
, but is
{
hidden_states
.
size
()
}
"
# previous time steps are cached - no need to recompute key and value if they are static
# previous time steps are cached - no need to recompute key and value if they are static
query_states
=
self
.
query_proj
(
hidden_states
)
/
(
self
.
head_dim
**
0.5
)
query_states
=
self
.
query_proj
(
hidden_states
)
/
(
self
.
head_dim
**
0.5
)
if
is_cross_attention
and
past_key_value
is
not
None
:
if
is_cross_attention
and
past_key_value
is
not
None
:
# reuse k,v, cross_attentions
# reuse k,v, cross_attentions
...
@@ -855,7 +855,7 @@ class ProphetNetNgramSelfAttention(nn.Module):
...
@@ -855,7 +855,7 @@ class ProphetNetNgramSelfAttention(nn.Module):
value_states
=
self
.
value_proj
(
hidden_states
)
value_states
=
self
.
value_proj
(
hidden_states
)
# normalize
# normalize
query_states
=
query_states
/
(
self
.
head_dim
**
0.5
)
query_states
=
query_states
/
(
self
.
head_dim
**
0.5
)
# reshape
# reshape
query_states
=
self
.
_shape
(
query_states
,
ngram_sequence_length
,
batch_size
)
query_states
=
self
.
_shape
(
query_states
,
ngram_sequence_length
,
batch_size
)
...
...
src/transformers/models/reformer/modeling_reformer.py
View file @
7732d0fe
...
@@ -700,7 +700,7 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
...
@@ -700,7 +700,7 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
# `num_buckets` should be set to 2 * sequence_length // chunk_length as recommended in paper
# `num_buckets` should be set to 2 * sequence_length // chunk_length as recommended in paper
num_buckets_pow_2
=
(
2
*
(
sequence_length
//
self
.
chunk_length
)).
bit_length
()
-
1
num_buckets_pow_2
=
(
2
*
(
sequence_length
//
self
.
chunk_length
)).
bit_length
()
-
1
# make sure buckets are power of 2
# make sure buckets are power of 2
num_buckets
=
2
**
num_buckets_pow_2
num_buckets
=
2
**
num_buckets_pow_2
# factorize `num_buckets` if `num_buckets` becomes too large
# factorize `num_buckets` if `num_buckets` becomes too large
num_buckets_limit
=
2
*
max
(
num_buckets_limit
=
2
*
max
(
...
@@ -966,7 +966,7 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
...
@@ -966,7 +966,7 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
"""
"""
length normalization
length normalization
"""
"""
variance
=
torch
.
mean
(
x
**
2
,
-
1
,
keepdim
=
True
)
variance
=
torch
.
mean
(
x
**
2
,
-
1
,
keepdim
=
True
)
norm_x
=
x
*
torch
.
rsqrt
(
variance
+
epsilon
)
norm_x
=
x
*
torch
.
rsqrt
(
variance
+
epsilon
)
return
norm_x
return
norm_x
...
...
src/transformers/models/roberta/tokenization_roberta.py
View file @
7732d0fe
...
@@ -77,10 +77,10 @@ def bytes_to_unicode():
...
@@ -77,10 +77,10 @@ def bytes_to_unicode():
)
)
cs
=
bs
[:]
cs
=
bs
[:]
n
=
0
n
=
0
for
b
in
range
(
2
**
8
):
for
b
in
range
(
2
**
8
):
if
b
not
in
bs
:
if
b
not
in
bs
:
bs
.
append
(
b
)
bs
.
append
(
b
)
cs
.
append
(
2
**
8
+
n
)
cs
.
append
(
2
**
8
+
n
)
n
+=
1
n
+=
1
cs
=
[
chr
(
n
)
for
n
in
cs
]
cs
=
[
chr
(
n
)
for
n
in
cs
]
return
dict
(
zip
(
bs
,
cs
))
return
dict
(
zip
(
bs
,
cs
))
...
...
src/transformers/models/sew/modeling_sew.py
View file @
7732d0fe
...
@@ -420,7 +420,7 @@ class SEWAttention(nn.Module):
...
@@ -420,7 +420,7 @@ class SEWAttention(nn.Module):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
f
" and `num_heads`:
{
num_heads
}
)."
)
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
...
...
src/transformers/models/speech_to_text/feature_extraction_speech_to_text.py
View file @
7732d0fe
...
@@ -86,7 +86,7 @@ class Speech2TextFeatureExtractor(SequenceFeatureExtractor):
...
@@ -86,7 +86,7 @@ class Speech2TextFeatureExtractor(SequenceFeatureExtractor):
Get mel-filter bank features using TorchAudio. Note that TorchAudio requires 16-bit signed integers as inputs
Get mel-filter bank features using TorchAudio. Note that TorchAudio requires 16-bit signed integers as inputs
and hence the waveform should not be normalized before feature extraction.
and hence the waveform should not be normalized before feature extraction.
"""
"""
waveform
=
waveform
*
(
2
**
15
)
# Kaldi compliance: 16-bit signed integers
waveform
=
waveform
*
(
2
**
15
)
# Kaldi compliance: 16-bit signed integers
waveform
=
torch
.
from_numpy
(
waveform
).
unsqueeze
(
0
)
waveform
=
torch
.
from_numpy
(
waveform
).
unsqueeze
(
0
)
features
=
ta_kaldi
.
fbank
(
waveform
,
num_mel_bins
=
self
.
num_mel_bins
,
sample_frequency
=
self
.
sampling_rate
)
features
=
ta_kaldi
.
fbank
(
waveform
,
num_mel_bins
=
self
.
num_mel_bins
,
sample_frequency
=
self
.
sampling_rate
)
return
features
.
numpy
()
return
features
.
numpy
()
...
...
src/transformers/models/speech_to_text/modeling_speech_to_text.py
View file @
7732d0fe
...
@@ -230,7 +230,7 @@ class Speech2TextAttention(nn.Module):
...
@@ -230,7 +230,7 @@ class Speech2TextAttention(nn.Module):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
f
" and `num_heads`:
{
num_heads
}
)."
)
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
...
...
src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
View file @
7732d0fe
...
@@ -256,7 +256,7 @@ class TFSpeech2TextAttention(tf.keras.layers.Layer):
...
@@ -256,7 +256,7 @@ class TFSpeech2TextAttention(tf.keras.layers.Layer):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
f
" and `num_heads`:
{
num_heads
}
)."
)
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
tf
.
keras
.
layers
.
Dense
(
embed_dim
,
use_bias
=
bias
,
name
=
"k_proj"
)
self
.
k_proj
=
tf
.
keras
.
layers
.
Dense
(
embed_dim
,
use_bias
=
bias
,
name
=
"k_proj"
)
...
...
src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py
View file @
7732d0fe
...
@@ -170,7 +170,7 @@ class Speech2Text2Attention(nn.Module):
...
@@ -170,7 +170,7 @@ class Speech2Text2Attention(nn.Module):
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
"embed_dim must be divisible by num_heads (got `embed_dim`:
{
self
.
embed_dim
}
"
f
" and `num_heads`:
{
num_heads
}
)."
f
" and `num_heads`:
{
num_heads
}
)."
)
)
self
.
scaling
=
self
.
head_dim
**
-
0.5
self
.
scaling
=
self
.
head_dim
**-
0.5
self
.
is_decoder
=
is_decoder
self
.
is_decoder
=
is_decoder
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
self
.
k_proj
=
nn
.
Linear
(
embed_dim
,
embed_dim
,
bias
=
bias
)
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment