Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
b24ead87
Unverified
Commit
b24ead87
authored
Apr 26, 2021
by
LSinev
Committed by
GitHub
Apr 26, 2021
Browse files
fix some typos in docs, comments, logging/errors (#11432)
parent
e3e70f95
Changes
77
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
38 additions
and
38 deletions
+38
-38
src/transformers/models/lxmert/configuration_lxmert.py
src/transformers/models/lxmert/configuration_lxmert.py
+2
-2
src/transformers/models/m2m_100/modeling_m2m_100.py
src/transformers/models/m2m_100/modeling_m2m_100.py
+2
-2
src/transformers/models/marian/modeling_marian.py
src/transformers/models/marian/modeling_marian.py
+2
-2
src/transformers/models/mbart/modeling_mbart.py
src/transformers/models/mbart/modeling_mbart.py
+3
-3
src/transformers/models/mbart/modeling_tf_mbart.py
src/transformers/models/mbart/modeling_tf_mbart.py
+2
-2
src/transformers/models/mobilebert/modeling_mobilebert.py
src/transformers/models/mobilebert/modeling_mobilebert.py
+1
-1
src/transformers/models/mpnet/modeling_tf_mpnet.py
src/transformers/models/mpnet/modeling_tf_mpnet.py
+1
-1
src/transformers/models/mpnet/tokenization_mpnet_fast.py
src/transformers/models/mpnet/tokenization_mpnet_fast.py
+1
-1
src/transformers/models/openai/modeling_openai.py
src/transformers/models/openai/modeling_openai.py
+6
-6
src/transformers/models/openai/modeling_tf_openai.py
src/transformers/models/openai/modeling_tf_openai.py
+1
-1
src/transformers/models/pegasus/modeling_pegasus.py
src/transformers/models/pegasus/modeling_pegasus.py
+2
-2
src/transformers/models/pegasus/modeling_tf_pegasus.py
src/transformers/models/pegasus/modeling_tf_pegasus.py
+1
-1
src/transformers/models/prophetnet/modeling_prophetnet.py
src/transformers/models/prophetnet/modeling_prophetnet.py
+4
-4
src/transformers/models/rag/modeling_tf_rag.py
src/transformers/models/rag/modeling_tf_rag.py
+1
-1
src/transformers/models/rag/retrieval_rag.py
src/transformers/models/rag/retrieval_rag.py
+2
-2
src/transformers/models/reformer/modeling_reformer.py
src/transformers/models/reformer/modeling_reformer.py
+1
-1
src/transformers/models/roberta/tokenization_roberta_fast.py
src/transformers/models/roberta/tokenization_roberta_fast.py
+1
-1
src/transformers/models/speech_to_text/modeling_speech_to_text.py
...sformers/models/speech_to_text/modeling_speech_to_text.py
+3
-3
src/transformers/models/t5/modeling_t5.py
src/transformers/models/t5/modeling_t5.py
+1
-1
src/transformers/models/t5/modeling_tf_t5.py
src/transformers/models/t5/modeling_tf_t5.py
+1
-1
No files found.
src/transformers/models/lxmert/configuration_lxmert.py
View file @
b24ead87
...
@@ -95,9 +95,9 @@ class LxmertConfig(PretrainedConfig):
...
@@ -95,9 +95,9 @@ class LxmertConfig(PretrainedConfig):
Whether or not to add masked language modeling (as used in pretraining models such as BERT) to the loss
Whether or not to add masked language modeling (as used in pretraining models such as BERT) to the loss
objective.
objective.
task_obj_predict (:obj:`bool`, `optional`, defaults to :obj:`True`):
task_obj_predict (:obj:`bool`, `optional`, defaults to :obj:`True`):
Whether or not to add object prediction, attribute
p
predictionand feature regression to the loss objective.
Whether or not to add object prediction, attribute prediction
and feature regression to the loss objective.
task_qa (:obj:`bool`, `optional`, defaults to :obj:`True`):
task_qa (:obj:`bool`, `optional`, defaults to :obj:`True`):
Whether or not to add the question-
as
answeringoss to the objective
Whether or not to add the question-answering
l
oss to the objective
visual_obj_loss (:obj:`bool`, `optional`, defaults to :obj:`True`):
visual_obj_loss (:obj:`bool`, `optional`, defaults to :obj:`True`):
Whether or not to calculate the object-prediction loss objective
Whether or not to calculate the object-prediction loss objective
visual_attr_loss (:obj:`bool`, `optional`, defaults to :obj:`True`):
visual_attr_loss (:obj:`bool`, `optional`, defaults to :obj:`True`):
...
...
src/transformers/models/m2m_100/modeling_m2m_100.py
View file @
b24ead87
...
@@ -306,9 +306,9 @@ class M2M100Attention(nn.Module):
...
@@ -306,9 +306,9 @@ class M2M100Attention(nn.Module):
attn_weights
=
attn_weights
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
if
output_attentions
:
if
output_attentions
:
# this operation is a bit akward, but it's required to
# this operation is a bit a
w
kward, but it's required to
# make sure that attn_weights keeps its gradient.
# make sure that attn_weights keeps its gradient.
# In order to do so, attn_weights have to reshaped
# In order to do so, attn_weights have to
be
reshaped
# twice and have to be reused in the following
# twice and have to be reused in the following
attn_weights_reshaped
=
attn_weights
.
view
(
bsz
,
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights_reshaped
=
attn_weights
.
view
(
bsz
,
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights_reshaped
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights_reshaped
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
...
...
src/transformers/models/marian/modeling_marian.py
View file @
b24ead87
...
@@ -252,9 +252,9 @@ class MarianAttention(nn.Module):
...
@@ -252,9 +252,9 @@ class MarianAttention(nn.Module):
attn_weights
=
attn_weights
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
if
output_attentions
:
if
output_attentions
:
# this operation is a bit akward, but it's required to
# this operation is a bit a
w
kward, but it's required to
# make sure that attn_weights keeps its gradient.
# make sure that attn_weights keeps its gradient.
# In order to do so, attn_weights have to reshaped
# In order to do so, attn_weights have to
be
reshaped
# twice and have to be reused in the following
# twice and have to be reused in the following
attn_weights_reshaped
=
attn_weights
.
view
(
bsz
,
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights_reshaped
=
attn_weights
.
view
(
bsz
,
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights_reshaped
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights_reshaped
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
...
...
src/transformers/models/mbart/modeling_mbart.py
View file @
b24ead87
...
@@ -117,7 +117,7 @@ class MBartLearnedPositionalEmbedding(nn.Embedding):
...
@@ -117,7 +117,7 @@ class MBartLearnedPositionalEmbedding(nn.Embedding):
def
__init__
(
self
,
num_embeddings
:
int
,
embedding_dim
:
int
):
def
__init__
(
self
,
num_embeddings
:
int
,
embedding_dim
:
int
):
# MBart is set up so that if padding_idx is specified then offset the embedding ids by 2
# MBart is set up so that if padding_idx is specified then offset the embedding ids by 2
# and adjust num_embeddings appropriately. Other models dont have this hack
# and adjust num_embeddings appropriately. Other models don
'
t have this hack
self
.
offset
=
2
self
.
offset
=
2
super
().
__init__
(
num_embeddings
+
self
.
offset
,
embedding_dim
)
super
().
__init__
(
num_embeddings
+
self
.
offset
,
embedding_dim
)
...
@@ -243,9 +243,9 @@ class MBartAttention(nn.Module):
...
@@ -243,9 +243,9 @@ class MBartAttention(nn.Module):
attn_weights
=
attn_weights
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
if
output_attentions
:
if
output_attentions
:
# this operation is a bit akward, but it's required to
# this operation is a bit a
w
kward, but it's required to
# make sure that attn_weights keeps its gradient.
# make sure that attn_weights keeps its gradient.
# In order to do so, attn_weights have to reshaped
# In order to do so, attn_weights have to
be
reshaped
# twice and have to be reused in the following
# twice and have to be reused in the following
attn_weights_reshaped
=
attn_weights
.
view
(
bsz
,
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights_reshaped
=
attn_weights
.
view
(
bsz
,
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights_reshaped
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights_reshaped
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
...
...
src/transformers/models/mbart/modeling_tf_mbart.py
View file @
b24ead87
...
@@ -118,7 +118,7 @@ class TFMBartLearnedPositionalEmbedding(TFSharedEmbeddings):
...
@@ -118,7 +118,7 @@ class TFMBartLearnedPositionalEmbedding(TFSharedEmbeddings):
def
__init__
(
self
,
num_embeddings
:
int
,
embedding_dim
:
int
,
**
kwargs
):
def
__init__
(
self
,
num_embeddings
:
int
,
embedding_dim
:
int
,
**
kwargs
):
# MBart is set up so that if padding_idx is specified then offset the embedding ids by 2
# MBart is set up so that if padding_idx is specified then offset the embedding ids by 2
# and adjust num_embeddings appropriately. Other models dont have this hack
# and adjust num_embeddings appropriately. Other models don
'
t have this hack
self
.
offset
=
2
self
.
offset
=
2
super
().
__init__
(
num_embeddings
+
self
.
offset
,
embedding_dim
,
**
kwargs
)
super
().
__init__
(
num_embeddings
+
self
.
offset
,
embedding_dim
,
**
kwargs
)
...
@@ -690,7 +690,7 @@ class TFMBartEncoder(tf.keras.layers.Layer):
...
@@ -690,7 +690,7 @@ class TFMBartEncoder(tf.keras.layers.Layer):
Mask to nullify selected heads of the attention modules. Mask values selected in ``[0, 1]``:
Mask to nullify selected heads of the attention modules. Mask values selected in ``[0, 1]``:
- 1 indicates the head is **not masked**,
- 1 indicates the head is **not masked**,
- 0 indicates the hea
s
is **masked**.
- 0 indicates the hea
d
is **masked**.
inputs_embeds (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
inputs_embeds (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded
Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded
...
...
src/transformers/models/mobilebert/modeling_mobilebert.py
View file @
b24ead87
...
@@ -1487,7 +1487,7 @@ class MobileBertForMultipleChoice(MobileBertPreTrainedModel):
...
@@ -1487,7 +1487,7 @@ class MobileBertForMultipleChoice(MobileBertPreTrainedModel):
@
add_start_docstrings
(
@
add_start_docstrings
(
"""
"""
Mo
i
bleBert Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g.
Mob
i
leBert Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g.
for Named-Entity-Recognition (NER) tasks.
for Named-Entity-Recognition (NER) tasks.
"""
,
"""
,
MOBILEBERT_START_DOCSTRING
,
MOBILEBERT_START_DOCSTRING
,
...
...
src/transformers/models/mpnet/modeling_tf_mpnet.py
View file @
b24ead87
...
@@ -674,7 +674,7 @@ MPNET_INPUTS_DOCSTRING = r"""
...
@@ -674,7 +674,7 @@ MPNET_INPUTS_DOCSTRING = r"""
@
add_start_docstrings
(
@
add_start_docstrings
(
"The bare MPNet Model transformer outputing raw hidden-states without any specific head on top."
,
"The bare MPNet Model transformer output
t
ing raw hidden-states without any specific head on top."
,
MPNET_START_DOCSTRING
,
MPNET_START_DOCSTRING
,
)
)
class
TFMPNetModel
(
TFMPNetPreTrainedModel
):
class
TFMPNetModel
(
TFMPNetPreTrainedModel
):
...
...
src/transformers/models/mpnet/tokenization_mpnet_fast.py
View file @
b24ead87
...
@@ -154,7 +154,7 @@ class MPNetTokenizerFast(PreTrainedTokenizerFast):
...
@@ -154,7 +154,7 @@ class MPNetTokenizerFast(PreTrainedTokenizerFast):
:obj:`str`: Mask token, to use when training a model with masked-language modeling. Log an error if used while
:obj:`str`: Mask token, to use when training a model with masked-language modeling. Log an error if used while
not having been set.
not having been set.
MPNet tokenizer has a special mask token to be usble in the fill-mask pipeline. The mask token will greedily
MPNet tokenizer has a special mask token to be us
a
ble in the fill-mask pipeline. The mask token will greedily
comprise the space before the `<mask>`.
comprise the space before the `<mask>`.
"""
"""
if
self
.
_mask_token
is
None
and
self
.
verbose
:
if
self
.
_mask_token
is
None
and
self
.
verbose
:
...
...
src/transformers/models/openai/modeling_openai.py
View file @
b24ead87
...
@@ -146,7 +146,7 @@ class Attention(nn.Module):
...
@@ -146,7 +146,7 @@ class Attention(nn.Module):
def
__init__
(
self
,
nx
,
n_ctx
,
config
,
scale
=
False
):
def
__init__
(
self
,
nx
,
n_ctx
,
config
,
scale
=
False
):
super
().
__init__
()
super
().
__init__
()
n_state
=
nx
# in Attention: n_state=768 (nx=n_embd)
n_state
=
nx
# in Attention: n_state=768 (nx=n_embd)
# [switch nx => n_state from Block to Attention to keep identical to TF implem]
# [switch nx => n_state from Block to Attention to keep identical to TF implem
entation
]
assert
n_state
%
config
.
n_head
==
0
assert
n_state
%
config
.
n_head
==
0
self
.
register_buffer
(
"bias"
,
torch
.
tril
(
torch
.
ones
(
n_ctx
,
n_ctx
)).
view
(
1
,
1
,
n_ctx
,
n_ctx
))
self
.
register_buffer
(
"bias"
,
torch
.
tril
(
torch
.
ones
(
n_ctx
,
n_ctx
)).
view
(
1
,
1
,
n_ctx
,
n_ctx
))
self
.
n_head
=
config
.
n_head
self
.
n_head
=
config
.
n_head
...
@@ -178,7 +178,7 @@ class Attention(nn.Module):
...
@@ -178,7 +178,7 @@ class Attention(nn.Module):
w
=
torch
.
matmul
(
q
,
k
)
w
=
torch
.
matmul
(
q
,
k
)
if
self
.
scale
:
if
self
.
scale
:
w
=
w
/
math
.
sqrt
(
v
.
size
(
-
1
))
w
=
w
/
math
.
sqrt
(
v
.
size
(
-
1
))
# w = w * self.bias + -1e9 * (1 - self.bias) # TF implem method: mask_attn_weights
# w = w * self.bias + -1e9 * (1 - self.bias) # TF implem
entation
method: mask_attn_weights
# XD: self.b may be larger than w, so we need to crop it
# XD: self.b may be larger than w, so we need to crop it
b
=
self
.
bias
[:,
:,
:
w
.
size
(
-
2
),
:
w
.
size
(
-
1
)]
b
=
self
.
bias
[:,
:,
:
w
.
size
(
-
2
),
:
w
.
size
(
-
1
)]
w
=
w
*
b
+
-
1e4
*
(
1
-
b
)
w
=
w
*
b
+
-
1e4
*
(
1
-
b
)
...
@@ -202,11 +202,11 @@ class Attention(nn.Module):
...
@@ -202,11 +202,11 @@ class Attention(nn.Module):
def
merge_heads
(
self
,
x
):
def
merge_heads
(
self
,
x
):
x
=
x
.
permute
(
0
,
2
,
1
,
3
).
contiguous
()
x
=
x
.
permute
(
0
,
2
,
1
,
3
).
contiguous
()
new_x_shape
=
x
.
size
()[:
-
2
]
+
(
x
.
size
(
-
2
)
*
x
.
size
(
-
1
),)
new_x_shape
=
x
.
size
()[:
-
2
]
+
(
x
.
size
(
-
2
)
*
x
.
size
(
-
1
),)
return
x
.
view
(
*
new_x_shape
)
# in Tensorflow implem: fct merge_states
return
x
.
view
(
*
new_x_shape
)
# in Tensorflow implem
entation
: fct merge_states
def
split_heads
(
self
,
x
,
k
=
False
):
def
split_heads
(
self
,
x
,
k
=
False
):
new_x_shape
=
x
.
size
()[:
-
1
]
+
(
self
.
n_head
,
x
.
size
(
-
1
)
//
self
.
n_head
)
new_x_shape
=
x
.
size
()[:
-
1
]
+
(
self
.
n_head
,
x
.
size
(
-
1
)
//
self
.
n_head
)
x
=
x
.
view
(
*
new_x_shape
)
# in Tensorflow implem: fct split_states
x
=
x
.
view
(
*
new_x_shape
)
# in Tensorflow implem
entation
: fct split_states
if
k
:
if
k
:
return
x
.
permute
(
0
,
2
,
3
,
1
)
return
x
.
permute
(
0
,
2
,
3
,
1
)
else
:
else
:
...
@@ -467,7 +467,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
...
@@ -467,7 +467,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
if
position_ids
is
None
:
if
position_ids
is
None
:
# Code is different from when we had a single embedding matri
ce
from position and token embeddings
# Code is different from when we had a single embedding matri
x
from position and token embeddings
position_ids
=
self
.
position_ids
[
None
,
:
input_shape
[
-
1
]]
position_ids
=
self
.
position_ids
[
None
,
:
input_shape
[
-
1
]]
# Attention mask.
# Attention mask.
...
@@ -814,7 +814,7 @@ class OpenAIGPTForSequenceClassification(OpenAIGPTPreTrainedModel):
...
@@ -814,7 +814,7 @@ class OpenAIGPTForSequenceClassification(OpenAIGPTPreTrainedModel):
sequence_lengths
=
-
1
sequence_lengths
=
-
1
logger
.
warning
(
logger
.
warning
(
f
"
{
self
.
__class__
.
__name__
}
will not detect padding tokens in `inputs_embeds`. Results may be "
f
"
{
self
.
__class__
.
__name__
}
will not detect padding tokens in `inputs_embeds`. Results may be "
f
"unexpected if using padding tokens in conjuction with `inputs_embeds.`"
f
"unexpected if using padding tokens in conju
n
ction with `inputs_embeds.`"
)
)
pooled_logits
=
logits
[
range
(
batch_size
),
sequence_lengths
]
pooled_logits
=
logits
[
range
(
batch_size
),
sequence_lengths
]
...
...
src/transformers/models/openai/modeling_tf_openai.py
View file @
b24ead87
...
@@ -62,7 +62,7 @@ class TFAttention(tf.keras.layers.Layer):
...
@@ -62,7 +62,7 @@ class TFAttention(tf.keras.layers.Layer):
super
().
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
n_state
=
nx
# in Attention: n_state=768 (nx=n_embd)
n_state
=
nx
# in Attention: n_state=768 (nx=n_embd)
# [switch nx => n_state from Block to Attention to keep identical to TF implem]
# [switch nx => n_state from Block to Attention to keep identical to TF implem
entation
]
assert
(
assert
(
n_state
%
config
.
n_head
==
0
n_state
%
config
.
n_head
==
0
),
f
"Hidden dimension
{
n_state
}
not dividable by number of heads
{
config
.
n_head
}
"
),
f
"Hidden dimension
{
n_state
}
not dividable by number of heads
{
config
.
n_head
}
"
...
...
src/transformers/models/pegasus/modeling_pegasus.py
View file @
b24ead87
...
@@ -252,9 +252,9 @@ class PegasusAttention(nn.Module):
...
@@ -252,9 +252,9 @@ class PegasusAttention(nn.Module):
attn_weights
=
attn_weights
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
if
output_attentions
:
if
output_attentions
:
# this operation is a bit akward, but it's required to
# this operation is a bit a
w
kward, but it's required to
# make sure that attn_weights keeps its gradient.
# make sure that attn_weights keeps its gradient.
# In order to do so, attn_weights have to reshaped
# In order to do so, attn_weights have to
be
reshaped
# twice and have to be reused in the following
# twice and have to be reused in the following
attn_weights_reshaped
=
attn_weights
.
view
(
bsz
,
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights_reshaped
=
attn_weights
.
view
(
bsz
,
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights_reshaped
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights_reshaped
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
...
...
src/transformers/models/pegasus/modeling_tf_pegasus.py
View file @
b24ead87
...
@@ -719,7 +719,7 @@ class TFPegasusEncoder(tf.keras.layers.Layer):
...
@@ -719,7 +719,7 @@ class TFPegasusEncoder(tf.keras.layers.Layer):
Mask to nullify selected heads of the attention modules. Mask values selected in ``[0, 1]``:
Mask to nullify selected heads of the attention modules. Mask values selected in ``[0, 1]``:
- 1 indicates the head is **not masked**,
- 1 indicates the head is **not masked**,
- 0 indicates the hea
s
is **masked**.
- 0 indicates the hea
d
is **masked**.
inputs_embeds (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
inputs_embeds (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded
Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded
...
...
src/transformers/models/prophetnet/modeling_prophetnet.py
View file @
b24ead87
...
@@ -723,9 +723,9 @@ class ProphetNetAttention(nn.Module):
...
@@ -723,9 +723,9 @@ class ProphetNetAttention(nn.Module):
attn_weights
=
attn_weights
+
attention_mask
attn_weights
=
attn_weights
+
attention_mask
if
output_attentions
:
if
output_attentions
:
# this operation is a bit akward, but it's required to
# this operation is a bit a
w
kward, but it's required to
# make sure that attn_weights keeps its gradient.
# make sure that attn_weights keeps its gradient.
# In order to do so, attn_weights have to reshaped
# In order to do so, attn_weights have to
be
reshaped
# twice and have to be reused in the following
# twice and have to be reused in the following
attn_weights_reshaped
=
attn_weights
.
view
(
batch_size
,
self
.
num_attn_heads
,
tgt_len
,
src_len
)
attn_weights_reshaped
=
attn_weights
.
view
(
batch_size
,
self
.
num_attn_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights_reshaped
.
view
(
batch_size
*
self
.
num_attn_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights_reshaped
.
view
(
batch_size
*
self
.
num_attn_heads
,
tgt_len
,
src_len
)
...
@@ -1243,7 +1243,7 @@ class ProphetNetEncoder(ProphetNetPreTrainedModel):
...
@@ -1243,7 +1243,7 @@ class ProphetNetEncoder(ProphetNetPreTrainedModel):
r
"""
r
"""
word_embeddings (:obj:`torch.nn.Embeddings` of shape :obj:`(config.vocab_size, config.hidden_size)`, `optional`):
word_embeddings (:obj:`torch.nn.Embeddings` of shape :obj:`(config.vocab_size, config.hidden_size)`, `optional`):
The word embedding parameters. This can be used to initialize :class:`~transformers.ProphetNetEncoder` with
The word embedding parameters. This can be used to initialize :class:`~transformers.ProphetNetEncoder` with
pre-defined word embeddings instead of random
e
ly initialized word embeddings.
pre-defined word embeddings instead of randomly initialized word embeddings.
"""
"""
def
__init__
(
self
,
config
:
ProphetNetConfig
,
word_embeddings
:
nn
.
Embedding
=
None
):
def
__init__
(
self
,
config
:
ProphetNetConfig
,
word_embeddings
:
nn
.
Embedding
=
None
):
...
@@ -1380,7 +1380,7 @@ class ProphetNetDecoder(ProphetNetPreTrainedModel):
...
@@ -1380,7 +1380,7 @@ class ProphetNetDecoder(ProphetNetPreTrainedModel):
r
"""
r
"""
word_embeddings (:obj:`torch.nn.Embeddings` of shape :obj:`(config.vocab_size, config.hidden_size)`, `optional`):
word_embeddings (:obj:`torch.nn.Embeddings` of shape :obj:`(config.vocab_size, config.hidden_size)`, `optional`):
The word embedding parameters. This can be used to initialize :class:`~transformers.ProphetNetEncoder` with
The word embedding parameters. This can be used to initialize :class:`~transformers.ProphetNetEncoder` with
pre-defined word embeddings instead of random
e
ly initialized word embeddings.
pre-defined word embeddings instead of randomly initialized word embeddings.
"""
"""
def
__init__
(
self
,
config
:
ProphetNetConfig
,
word_embeddings
:
nn
.
Embedding
=
None
):
def
__init__
(
self
,
config
:
ProphetNetConfig
,
word_embeddings
:
nn
.
Embedding
=
None
):
...
...
src/transformers/models/rag/modeling_tf_rag.py
View file @
b24ead87
...
@@ -285,7 +285,7 @@ class TFRagPreTrainedModel(TFPreTrainedModel):
...
@@ -285,7 +285,7 @@ class TFRagPreTrainedModel(TFPreTrainedModel):
>>> # load retriever
>>> # load retriever
>>> retriever = RagRetriever.from_pretrained(PATH, index_name="exact", use_dummy_dataset=True)
>>> retriever = RagRetriever.from_pretrained(PATH, index_name="exact", use_dummy_dataset=True)
>>> # load fine-tuned model with retriver
>>> # load fine-tuned model with retri
e
ver
>>> model = TFRagModel.from_pretrained("./rag", retriever=retriever)
>>> model = TFRagModel.from_pretrained("./rag", retriever=retriever)
"""
"""
...
...
src/transformers/models/rag/retrieval_rag.py
View file @
b24ead87
...
@@ -234,7 +234,7 @@ class CanonicalHFIndex(HFIndexBase):
...
@@ -234,7 +234,7 @@ class CanonicalHFIndex(HFIndexBase):
Args:
Args:
vector_size (:obj:`int`): the dimension of the passages embeddings used by the index
vector_size (:obj:`int`): the dimension of the passages embeddings used by the index
dataset_name (:obj:`str`, optional, defaults to ``wiki_dpr``):
dataset_name (:obj:`str`, optional, defaults to ``wiki_dpr``):
A data
t
set identifier of the indexed dataset on HuggingFace AWS bucket (list all available datasets and ids
A dataset identifier of the indexed dataset on HuggingFace AWS bucket (list all available datasets and ids
with ``datasets.list_datasets()``).
with ``datasets.list_datasets()``).
dataset_split (:obj:`str`, optional, defaults to ``train``)
dataset_split (:obj:`str`, optional, defaults to ``train``)
Which split of the ``dataset`` to load.
Which split of the ``dataset`` to load.
...
@@ -442,7 +442,7 @@ class RagRetriever:
...
@@ -442,7 +442,7 @@ class RagRetriever:
def
init_retrieval
(
self
):
def
init_retrieval
(
self
):
"""
"""
Retriever initalization function. It loads the index into memory.
Retriever init
i
alization function. It loads the index into memory.
"""
"""
logger
.
info
(
"initializing retrieval"
)
logger
.
info
(
"initializing retrieval"
)
...
...
src/transformers/models/reformer/modeling_reformer.py
View file @
b24ead87
...
@@ -612,7 +612,7 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
...
@@ -612,7 +612,7 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
if
isinstance
(
self
.
num_buckets
,
int
):
if
isinstance
(
self
.
num_buckets
,
int
):
assert
(
assert
(
self
.
num_buckets
%
2
==
0
self
.
num_buckets
%
2
==
0
),
f
"There should be an even number of buck
t
es, but `self.num_buck
t
es`:
{
self
.
num_buckets
}
"
),
f
"There should be an even number of bucke
t
s, but `self.num_bucke
t
s`:
{
self
.
num_buckets
}
"
rotation_size
=
self
.
num_buckets
rotation_size
=
self
.
num_buckets
num_buckets
=
self
.
num_buckets
num_buckets
=
self
.
num_buckets
else
:
else
:
...
...
src/transformers/models/roberta/tokenization_roberta_fast.py
View file @
b24ead87
...
@@ -179,7 +179,7 @@ class RobertaTokenizerFast(GPT2TokenizerFast):
...
@@ -179,7 +179,7 @@ class RobertaTokenizerFast(GPT2TokenizerFast):
:obj:`str`: Mask token, to use when training a model with masked-language modeling. Log an error if used while
:obj:`str`: Mask token, to use when training a model with masked-language modeling. Log an error if used while
not having been set.
not having been set.
Roberta tokenizer has a special mask token to be usble in the fill-mask pipeline. The mask token will greedily
Roberta tokenizer has a special mask token to be us
a
ble in the fill-mask pipeline. The mask token will greedily
comprise the space before the `<mask>`.
comprise the space before the `<mask>`.
"""
"""
if
self
.
_mask_token
is
None
and
self
.
verbose
:
if
self
.
_mask_token
is
None
and
self
.
verbose
:
...
...
src/transformers/models/speech_to_text/modeling_speech_to_text.py
View file @
b24ead87
...
@@ -319,9 +319,9 @@ class Speech2TextAttention(nn.Module):
...
@@ -319,9 +319,9 @@ class Speech2TextAttention(nn.Module):
attn_weights
=
attn_weights
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
if
output_attentions
:
if
output_attentions
:
# this operation is a bit akward, but it's required to
# this operation is a bit a
w
kward, but it's required to
# make sure that attn_weights keeps its gradient.
# make sure that attn_weights keeps its gradient.
# In order to do so, attn_weights have to reshaped
# In order to do so, attn_weights have to
be
reshaped
# twice and have to be reused in the following
# twice and have to be reused in the following
attn_weights_reshaped
=
attn_weights
.
view
(
bsz
,
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights_reshaped
=
attn_weights
.
view
(
bsz
,
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights_reshaped
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights_reshaped
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
...
@@ -559,7 +559,7 @@ class Speech2TextPreTrainedModel(PreTrainedModel):
...
@@ -559,7 +559,7 @@ class Speech2TextPreTrainedModel(PreTrainedModel):
return
input_lengths
return
input_lengths
def
_get_subsampled_encoder_attn_mask
(
self
,
attention_mask
):
def
_get_subsampled_encoder_attn_mask
(
self
,
attention_mask
):
# generate creates 3D attention mask, bec
u
ase of the shape of input_features
# generate creates 3D attention mask, beca
u
se of the shape of input_features
# convert it to 2D if thats the case
# convert it to 2D if thats the case
if
len
(
attention_mask
.
shape
)
>
2
:
if
len
(
attention_mask
.
shape
)
>
2
:
attention_mask
=
attention_mask
[:,
:,
-
1
]
attention_mask
=
attention_mask
[:,
:,
-
1
]
...
...
src/transformers/models/t5/modeling_t5.py
View file @
b24ead87
...
@@ -1172,7 +1172,7 @@ T5_ENCODER_INPUTS_DOCSTRING = r"""
...
@@ -1172,7 +1172,7 @@ T5_ENCODER_INPUTS_DOCSTRING = r"""
Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple.
Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple.
"""
"""
# Warning messa
f
e for FutureWarning: head_mask was separated into two input args - head_mask, decoder_head_mask
# Warning messa
g
e for FutureWarning: head_mask was separated into two input args - head_mask, decoder_head_mask
__HEAD_MASK_WARNING_MSG
=
"""
__HEAD_MASK_WARNING_MSG
=
"""
The input argument `head_mask` was split into two arguments `head_mask` and `decoder_head_mask`. Currently,
The input argument `head_mask` was split into two arguments `head_mask` and `decoder_head_mask`. Currently,
`decoder_head_mask` is set to copy `head_mask`, but this feature is deprecated and will be removed in future versions.
`decoder_head_mask` is set to copy `head_mask`, but this feature is deprecated and will be removed in future versions.
...
...
src/transformers/models/t5/modeling_tf_t5.py
View file @
b24ead87
...
@@ -637,7 +637,7 @@ class TFT5MainLayer(tf.keras.layers.Layer):
...
@@ -637,7 +637,7 @@ class TFT5MainLayer(tf.keras.layers.Layer):
raise
ValueError
(
f
"You have to specify either
{
err_msg_prefix
}
inputs or
{
err_msg_prefix
}
inputs_embeds"
)
raise
ValueError
(
f
"You have to specify either
{
err_msg_prefix
}
inputs or
{
err_msg_prefix
}
inputs_embeds"
)
if
inputs
[
"inputs_embeds"
]
is
None
:
if
inputs
[
"inputs_embeds"
]
is
None
:
assert
self
.
embed_tokens
is
not
None
,
"You have to intialize the model with valid token embeddings"
assert
self
.
embed_tokens
is
not
None
,
"You have to in
i
tialize the model with valid token embeddings"
inputs
[
"inputs_embeds"
]
=
self
.
embed_tokens
(
inputs
[
"input_ids"
])
inputs
[
"inputs_embeds"
]
=
self
.
embed_tokens
(
inputs
[
"input_ids"
])
batch_size
,
seq_length
=
input_shape
batch_size
,
seq_length
=
input_shape
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment