Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
611d3a09
Unverified
Commit
611d3a09
authored
Mar 15, 2022
by
Minh Chien Vu
Committed by
GitHub
Mar 15, 2022
Browse files
Change unpacking of TF inputs: layoutlm, mpnet, rag, and roformer (#16112)
Co-authored-by:
ChienVM
<
chien_vm@detomo.co.jp
>
parent
0d7322c1
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
223 additions
and
626 deletions
+223
-626
src/transformers/models/layoutlm/modeling_tf_layoutlm.py
src/transformers/models/layoutlm/modeling_tf_layoutlm.py
+45
-129
src/transformers/models/mpnet/modeling_tf_mpnet.py
src/transformers/models/mpnet/modeling_tf_mpnet.py
+64
-171
src/transformers/models/rag/modeling_tf_rag.py
src/transformers/models/rag/modeling_tf_rag.py
+42
-129
src/transformers/models/roformer/modeling_tf_roformer.py
src/transformers/models/roformer/modeling_tf_roformer.py
+72
-197
No files found.
src/transformers/models/layoutlm/modeling_tf_layoutlm.py
View file @
611d3a09
...
...
@@ -37,8 +37,8 @@ from ...modeling_tf_utils import (
TFSequenceClassificationLoss
,
TFTokenClassificationLoss
,
get_initializer
,
input_processing
,
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
from
...utils
import
logging
...
...
@@ -691,6 +691,7 @@ class TFLayoutLMMainLayer(tf.keras.layers.Layer):
"""
raise
NotImplementedError
@
unpack_inputs
def
call
(
self
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
...
...
@@ -708,47 +709,31 @@ class TFLayoutLMMainLayer(tf.keras.layers.Layer):
training
:
bool
=
False
,
**
kwargs
,
)
->
Union
[
TFBaseModelOutputWithPoolingAndCrossAttentions
,
Tuple
[
tf
.
Tensor
]]:
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
bbox
=
bbox
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
,
head_mask
=
head_mask
,
inputs_embeds
=
inputs_embeds
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
training
=
training
,
kwargs_call
=
kwargs
,
)
if
inputs
[
"
input_ids
"
]
is
not
None
and
inputs
[
"
inputs_embeds
"
]
is
not
None
:
if
input_ids
is
not
None
and
inputs_embeds
is
not
None
:
raise
ValueError
(
"You cannot specify both input_ids and inputs_embeds at the same time"
)
elif
inputs
[
"
input_ids
"
]
is
not
None
:
input_shape
=
shape_list
(
inputs
[
"
input_ids
"
]
)
elif
inputs
[
"
inputs_embeds
"
]
is
not
None
:
input_shape
=
shape_list
(
inputs
[
"inputs
_embeds
"
]
)[:
-
1
]
elif
input_ids
is
not
None
:
input_shape
=
shape_list
(
input_ids
)
elif
inputs_embeds
is
not
None
:
input_shape
=
shape_list
(
inputs_embeds
)[:
-
1
]
else
:
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
if
inputs
[
"
attention_mask
"
]
is
None
:
inputs
[
"
attention_mask
"
]
=
tf
.
fill
(
dims
=
input_shape
,
value
=
1
)
if
attention_mask
is
None
:
attention_mask
=
tf
.
fill
(
dims
=
input_shape
,
value
=
1
)
if
inputs
[
"
token_type_ids
"
]
is
None
:
inputs
[
"
token_type_ids
"
]
=
tf
.
fill
(
dims
=
input_shape
,
value
=
0
)
if
inputs
[
"
bbox
"
]
is
None
:
inputs
[
"
bbox
"
]
=
tf
.
fill
(
dims
=
input_shape
+
[
4
],
value
=
0
)
if
token_type_ids
is
None
:
token_type_ids
=
tf
.
fill
(
dims
=
input_shape
,
value
=
0
)
if
bbox
is
None
:
bbox
=
tf
.
fill
(
dims
=
input_shape
+
[
4
],
value
=
0
)
embedding_output
=
self
.
embeddings
(
input_ids
=
inputs
[
"
input_ids
"
]
,
bbox
=
inputs
[
"
bbox
"
]
,
position_ids
=
inputs
[
"
position_ids
"
]
,
token_type_ids
=
inputs
[
"
token_type_ids
"
]
,
inputs_embeds
=
inputs
[
"inputs
_embeds
"
]
,
training
=
inputs
[
"
training
"
]
,
input_ids
=
input_ids
,
bbox
=
bbox
,
position_ids
=
position_ids
,
token_type_ids
=
token_type_ids
,
inputs_embeds
=
inputs_embeds
,
training
=
training
,
)
# We create a 3D attention mask from a 2D tensor mask.
...
...
@@ -756,7 +741,7 @@ class TFLayoutLMMainLayer(tf.keras.layers.Layer):
# So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
# this attention mask is more simple than the triangular masking of causal attention
# used in OpenAI GPT, we just need to prepare the broadcast dimension here.
extended_attention_mask
=
tf
.
reshape
(
inputs
[
"
attention_mask
"
]
,
(
input_shape
[
0
],
1
,
1
,
input_shape
[
1
]))
extended_attention_mask
=
tf
.
reshape
(
attention_mask
,
(
input_shape
[
0
],
1
,
1
,
input_shape
[
1
]))
# Since attention_mask is 1.0 for positions we want to attend and 0.0 for
# masked positions, this operation will create a tensor which is 0.0 for
...
...
@@ -773,30 +758,30 @@ class TFLayoutLMMainLayer(tf.keras.layers.Layer):
# attention_probs has shape bsz x n_heads x N x N
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
if
inputs
[
"
head_mask
"
]
is
not
None
:
if
head_mask
is
not
None
:
raise
NotImplementedError
else
:
inputs
[
"
head_mask
"
]
=
[
None
]
*
self
.
config
.
num_hidden_layers
head_mask
=
[
None
]
*
self
.
config
.
num_hidden_layers
encoder_outputs
=
self
.
encoder
(
hidden_states
=
embedding_output
,
attention_mask
=
extended_attention_mask
,
head_mask
=
inputs
[
"
head_mask
"
]
,
head_mask
=
head_mask
,
# Need to pass these required positional arguments to `Encoder`
encoder_hidden_states
=
encoder_hidden_states
,
encoder_attention_mask
=
None
,
past_key_values
=
None
,
use_cache
=
False
,
output_attentions
=
inputs
[
"
output_attentions
"
]
,
output_hidden_states
=
inputs
[
"
output_hidden_states
"
]
,
return_dict
=
inputs
[
"
return_dict
"
]
,
training
=
inputs
[
"
training
"
]
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
training
=
training
,
)
sequence_output
=
encoder_outputs
[
0
]
pooled_output
=
self
.
pooler
(
hidden_states
=
sequence_output
)
if
self
.
pooler
is
not
None
else
None
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
return
(
sequence_output
,
pooled_output
,
...
...
@@ -924,6 +909,7 @@ class TFLayoutLMModel(TFLayoutLMPreTrainedModel):
self
.
layoutlm
=
TFLayoutLMMainLayer
(
config
,
name
=
"layoutlm"
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
LAYOUTLM_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
replace_return_docstrings
(
output_type
=
TFBaseModelOutputWithPoolingAndCrossAttentions
,
config_class
=
_CONFIG_FOR_DOC
...
...
@@ -979,9 +965,7 @@ class TFLayoutLMModel(TFLayoutLMPreTrainedModel):
>>> last_hidden_states = outputs.last_hidden_state
```"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
layoutlm
(
input_ids
=
input_ids
,
bbox
=
bbox
,
attention_mask
=
attention_mask
,
...
...
@@ -989,26 +973,10 @@ class TFLayoutLMModel(TFLayoutLMPreTrainedModel):
position_ids
=
position_ids
,
head_mask
=
head_mask
,
inputs_embeds
=
inputs_embeds
,
encoder_hidden_states
=
encoder_hidden_states
,
encoder_attention_mask
=
encoder_attention_mask
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
layoutlm
(
input_ids
=
inputs
[
"input_ids"
],
bbox
=
inputs
[
"bbox"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
return
outputs
...
...
@@ -1064,6 +1032,7 @@ class TFLayoutLMForMaskedLM(TFLayoutLMPreTrainedModel, TFMaskedLanguageModelingL
warnings
.
warn
(
"The method get_prefix_bias_name is deprecated. Please use `get_bias` instead."
,
FutureWarning
)
return
self
.
name
+
"/"
+
self
.
mlm
.
name
+
"/"
+
self
.
mlm
.
predictions
.
name
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
LAYOUTLM_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
replace_return_docstrings
(
output_type
=
TFMaskedLMOutput
,
config_class
=
_CONFIG_FOR_DOC
)
def
call
(
...
...
@@ -1127,9 +1096,7 @@ class TFLayoutLMForMaskedLM(TFLayoutLMPreTrainedModel, TFMaskedLanguageModelingL
>>> loss = outputs.loss
```"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
layoutlm
(
input_ids
=
input_ids
,
bbox
=
bbox
,
attention_mask
=
attention_mask
,
...
...
@@ -1140,32 +1107,13 @@ class TFLayoutLMForMaskedLM(TFLayoutLMPreTrainedModel, TFMaskedLanguageModelingL
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
layoutlm
(
input_ids
=
inputs
[
"input_ids"
],
bbox
=
inputs
[
"bbox"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
sequence_output
=
outputs
[
0
]
prediction_scores
=
self
.
mlm
(
sequence_output
=
sequence_output
,
training
=
inputs
[
"training"
])
loss
=
(
None
if
inputs
[
"labels"
]
is
None
else
self
.
hf_compute_loss
(
labels
=
inputs
[
"labels"
],
logits
=
prediction_scores
)
)
prediction_scores
=
self
.
mlm
(
sequence_output
=
sequence_output
,
training
=
training
)
loss
=
None
if
labels
is
None
else
self
.
hf_compute_loss
(
labels
=
labels
,
logits
=
prediction_scores
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
prediction_scores
,)
+
outputs
[
2
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
@@ -1208,6 +1156,7 @@ class TFLayoutLMForSequenceClassification(TFLayoutLMPreTrainedModel, TFSequenceC
name
=
"classifier"
,
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
LAYOUTLM_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
replace_return_docstrings
(
output_type
=
TFSequenceClassifierOutput
,
config_class
=
_CONFIG_FOR_DOC
)
def
call
(
...
...
@@ -1271,9 +1220,7 @@ class TFLayoutLMForSequenceClassification(TFLayoutLMPreTrainedModel, TFSequenceC
>>> loss = outputs.loss
>>> logits = outputs.logits
```"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
layoutlm
(
input_ids
=
input_ids
,
bbox
=
bbox
,
attention_mask
=
attention_mask
,
...
...
@@ -1284,29 +1231,14 @@ class TFLayoutLMForSequenceClassification(TFLayoutLMPreTrainedModel, TFSequenceC
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
layoutlm
(
input_ids
=
inputs
[
"input_ids"
],
bbox
=
inputs
[
"bbox"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
pooled_output
=
outputs
[
1
]
pooled_output
=
self
.
dropout
(
inputs
=
pooled_output
,
training
=
inputs
[
"
training
"
]
)
pooled_output
=
self
.
dropout
(
inputs
=
pooled_output
,
training
=
training
)
logits
=
self
.
classifier
(
inputs
=
pooled_output
)
loss
=
None
if
inputs
[
"
labels
"
]
is
None
else
self
.
hf_compute_loss
(
labels
=
inputs
[
"
labels
"
]
,
logits
=
logits
)
loss
=
None
if
labels
is
None
else
self
.
hf_compute_loss
(
labels
=
labels
,
logits
=
logits
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
logits
,)
+
outputs
[
2
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
@@ -1355,6 +1287,7 @@ class TFLayoutLMForTokenClassification(TFLayoutLMPreTrainedModel, TFTokenClassif
name
=
"classifier"
,
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
LAYOUTLM_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
replace_return_docstrings
(
output_type
=
TFTokenClassifierOutput
,
config_class
=
_CONFIG_FOR_DOC
)
def
call
(
...
...
@@ -1416,9 +1349,7 @@ class TFLayoutLMForTokenClassification(TFLayoutLMPreTrainedModel, TFTokenClassif
>>> loss = outputs.loss
>>> logits = outputs.logits
```"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
layoutlm
(
input_ids
=
input_ids
,
bbox
=
bbox
,
attention_mask
=
attention_mask
,
...
...
@@ -1429,29 +1360,14 @@ class TFLayoutLMForTokenClassification(TFLayoutLMPreTrainedModel, TFTokenClassif
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
layoutlm
(
input_ids
=
inputs
[
"input_ids"
],
bbox
=
inputs
[
"bbox"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
sequence_output
=
outputs
[
0
]
sequence_output
=
self
.
dropout
(
inputs
=
sequence_output
,
training
=
inputs
[
"
training
"
]
)
sequence_output
=
self
.
dropout
(
inputs
=
sequence_output
,
training
=
training
)
logits
=
self
.
classifier
(
inputs
=
sequence_output
)
loss
=
None
if
inputs
[
"
labels
"
]
is
None
else
self
.
hf_compute_loss
(
labels
=
inputs
[
"
labels
"
]
,
logits
=
logits
)
loss
=
None
if
labels
is
None
else
self
.
hf_compute_loss
(
labels
=
labels
,
logits
=
logits
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
logits
,)
+
outputs
[
2
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
src/transformers/models/mpnet/modeling_tf_mpnet.py
View file @
611d3a09
...
...
@@ -45,8 +45,8 @@ from ...modeling_tf_utils import (
TFSequenceClassificationLoss
,
TFTokenClassificationLoss
,
get_initializer
,
input_processing
,
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
from
...utils
import
logging
...
...
@@ -485,6 +485,7 @@ class TFMPNetMainLayer(tf.keras.layers.Layer):
"""
raise
NotImplementedError
@
unpack_inputs
def
call
(
self
,
input_ids
=
None
,
...
...
@@ -498,38 +499,24 @@ class TFMPNetMainLayer(tf.keras.layers.Layer):
training
=
False
,
**
kwargs
,
):
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
position_ids
=
position_ids
,
head_mask
=
head_mask
,
inputs_embeds
=
inputs_embeds
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
training
=
training
,
kwargs_call
=
kwargs
,
)
if
inputs
[
"
input_ids
"
]
is
not
None
and
inputs
[
"
inputs_embeds
"
]
is
not
None
:
if
input_ids
is
not
None
and
inputs_embeds
is
not
None
:
raise
ValueError
(
"You cannot specify both input_ids and inputs_embeds at the same time"
)
elif
inputs
[
"
input_ids
"
]
is
not
None
:
input_shape
=
shape_list
(
inputs
[
"
input_ids
"
]
)
elif
inputs
[
"
inputs_embeds
"
]
is
not
None
:
input_shape
=
shape_list
(
inputs
[
"inputs
_embeds
"
]
)[:
-
1
]
elif
input_ids
is
not
None
:
input_shape
=
shape_list
(
input_ids
)
elif
inputs_embeds
is
not
None
:
input_shape
=
shape_list
(
inputs_embeds
)[:
-
1
]
else
:
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
if
inputs
[
"
attention_mask
"
]
is
None
:
inputs
[
"
attention_mask
"
]
=
tf
.
fill
(
input_shape
,
1
)
if
attention_mask
is
None
:
attention_mask
=
tf
.
fill
(
input_shape
,
1
)
embedding_output
=
self
.
embeddings
(
inputs
[
"
input_ids
"
]
,
inputs
[
"
position_ids
"
]
,
inputs
[
"
inputs_embeds
"
]
,
training
=
inputs
[
"
training
"
]
,
input_ids
,
position_ids
,
inputs_embeds
,
training
=
training
,
)
# We create a 3D attention mask from a 2D tensor mask.
...
...
@@ -537,7 +524,7 @@ class TFMPNetMainLayer(tf.keras.layers.Layer):
# So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
# this attention mask is more simple than the triangular masking of causal attention
# used in OpenAI GPT, we just need to prepare the broadcast dimension here.
extended_attention_mask
=
tf
.
reshape
(
inputs
[
"
attention_mask
"
]
,
(
input_shape
[
0
],
1
,
1
,
input_shape
[
1
]))
extended_attention_mask
=
tf
.
reshape
(
attention_mask
,
(
input_shape
[
0
],
1
,
1
,
input_shape
[
1
]))
# Since attention_mask is 1.0 for positions we want to attend and 0.0 for
# masked positions, this operation will create a tensor which is 0.0 for
...
...
@@ -554,25 +541,25 @@ class TFMPNetMainLayer(tf.keras.layers.Layer):
# attention_probs has shape bsz x n_heads x N x N
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
if
inputs
[
"
head_mask
"
]
is
not
None
:
if
head_mask
is
not
None
:
raise
NotImplementedError
else
:
inputs
[
"
head_mask
"
]
=
[
None
]
*
self
.
num_hidden_layers
head_mask
=
[
None
]
*
self
.
num_hidden_layers
encoder_outputs
=
self
.
encoder
(
embedding_output
,
extended_attention_mask
,
inputs
[
"
head_mask
"
]
,
inputs
[
"
output_attentions
"
]
,
inputs
[
"
output_hidden_states
"
]
,
inputs
[
"
return_dict
"
]
,
training
=
inputs
[
"
training
"
]
,
head_mask
,
output_attentions
,
output_hidden_states
,
return_dict
,
training
=
training
,
)
sequence_output
=
encoder_outputs
[
0
]
pooled_output
=
self
.
pooler
(
sequence_output
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
return
(
sequence_output
,
pooled_output
,
...
...
@@ -680,6 +667,7 @@ class TFMPNetModel(TFMPNetPreTrainedModel):
super
().
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
mpnet
=
TFMPNetMainLayer
(
config
,
name
=
"mpnet"
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
MPNET_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
...
...
@@ -700,9 +688,7 @@ class TFMPNetModel(TFMPNetPreTrainedModel):
training
=
False
,
**
kwargs
,
):
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
mpnet
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
position_ids
=
position_ids
,
...
...
@@ -712,18 +698,6 @@ class TFMPNetModel(TFMPNetPreTrainedModel):
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
mpnet
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
return
outputs
...
...
@@ -809,6 +783,7 @@ class TFMPNetForMaskedLM(TFMPNetPreTrainedModel, TFMaskedLanguageModelingLoss):
warnings
.
warn
(
"The method get_prefix_bias_name is deprecated. Please use `get_bias` instead."
,
FutureWarning
)
return
self
.
name
+
"/"
+
self
.
lm_head
.
name
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
MPNET_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
...
...
@@ -836,11 +811,8 @@ class TFMPNetForMaskedLM(TFMPNetPreTrainedModel, TFMaskedLanguageModelingLoss):
config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
outputs
=
self
.
mpnet
(
input_ids
,
attention_mask
=
attention_mask
,
position_ids
=
position_ids
,
head_mask
=
head_mask
,
...
...
@@ -848,27 +820,14 @@ class TFMPNetForMaskedLM(TFMPNetPreTrainedModel, TFMaskedLanguageModelingLoss):
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
mpnet
(
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
sequence_output
=
outputs
[
0
]
prediction_scores
=
self
.
lm_head
(
sequence_output
)
loss
=
None
if
inputs
[
"
labels
"
]
is
None
else
self
.
hf_compute_loss
(
inputs
[
"
labels
"
]
,
prediction_scores
)
loss
=
None
if
labels
is
None
else
self
.
hf_compute_loss
(
labels
,
prediction_scores
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
prediction_scores
,)
+
outputs
[
2
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
@@ -930,6 +889,7 @@ class TFMPNetForSequenceClassification(TFMPNetPreTrainedModel, TFSequenceClassif
self
.
mpnet
=
TFMPNetMainLayer
(
config
,
name
=
"mpnet"
)
self
.
classifier
=
TFMPNetClassificationHead
(
config
,
name
=
"classifier"
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
MPNET_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
...
...
@@ -957,11 +917,8 @@ class TFMPNetForSequenceClassification(TFMPNetPreTrainedModel, TFSequenceClassif
config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
outputs
=
self
.
mpnet
(
input_ids
,
attention_mask
=
attention_mask
,
position_ids
=
position_ids
,
head_mask
=
head_mask
,
...
...
@@ -969,28 +926,15 @@ class TFMPNetForSequenceClassification(TFMPNetPreTrainedModel, TFSequenceClassif
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
mpnet
(
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
sequence_output
=
outputs
[
0
]
logits
=
self
.
classifier
(
sequence_output
,
training
=
training
)
loss
=
None
if
inputs
[
"
labels
"
]
is
None
else
self
.
hf_compute_loss
(
inputs
[
"
labels
"
]
,
logits
)
loss
=
None
if
labels
is
None
else
self
.
hf_compute_loss
(
labels
,
logits
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
logits
,)
+
outputs
[
2
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
@@ -1036,6 +980,7 @@ class TFMPNetForMultipleChoice(TFMPNetPreTrainedModel, TFMultipleChoiceLoss):
"""
return
{
"input_ids"
:
tf
.
constant
(
MULTIPLE_CHOICE_DUMMY_INPUTS
)}
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
MPNET_INPUTS_DOCSTRING
.
format
(
"batch_size, num_choices, sequence_length"
))
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
...
...
@@ -1062,59 +1007,39 @@ class TFMPNetForMultipleChoice(TFMPNetPreTrainedModel, TFMultipleChoiceLoss):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
where `num_choices` is the size of the second dimension of the input tensors. (See `input_ids` above)
"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
position_ids
=
position_ids
,
head_mask
=
head_mask
,
inputs_embeds
=
inputs_embeds
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
kwargs_call
=
kwargs
,
)
if
inputs
[
"input_ids"
]
is
not
None
:
num_choices
=
shape_list
(
inputs
[
"input_ids"
])[
1
]
seq_length
=
shape_list
(
inputs
[
"input_ids"
])[
2
]
if
input_ids
is
not
None
:
num_choices
=
shape_list
(
input_ids
)[
1
]
seq_length
=
shape_list
(
input_ids
)[
2
]
else
:
num_choices
=
shape_list
(
inputs
[
"inputs
_embeds
"
]
)[
1
]
seq_length
=
shape_list
(
inputs
[
"inputs
_embeds
"
]
)[
2
]
num_choices
=
shape_list
(
inputs_embeds
)[
1
]
seq_length
=
shape_list
(
inputs_embeds
)[
2
]
flat_input_ids
=
tf
.
reshape
(
inputs
[
"input_ids"
],
(
-
1
,
seq_length
))
if
inputs
[
"input_ids"
]
is
not
None
else
None
flat_attention_mask
=
(
tf
.
reshape
(
inputs
[
"attention_mask"
],
(
-
1
,
seq_length
))
if
inputs
[
"attention_mask"
]
is
not
None
else
None
)
flat_position_ids
=
(
tf
.
reshape
(
inputs
[
"position_ids"
],
(
-
1
,
seq_length
))
if
inputs
[
"position_ids"
]
is
not
None
else
None
)
flat_input_ids
=
tf
.
reshape
(
input_ids
,
(
-
1
,
seq_length
))
if
input_ids
is
not
None
else
None
flat_attention_mask
=
tf
.
reshape
(
attention_mask
,
(
-
1
,
seq_length
))
if
attention_mask
is
not
None
else
None
flat_position_ids
=
tf
.
reshape
(
position_ids
,
(
-
1
,
seq_length
))
if
position_ids
is
not
None
else
None
flat_inputs_embeds
=
(
tf
.
reshape
(
inputs
[
"inputs
_embeds
"
]
,
(
-
1
,
seq_length
,
shape_list
(
inputs
[
"inputs
_embeds
"
]
)[
3
]))
if
inputs
[
"
inputs_embeds
"
]
is
not
None
tf
.
reshape
(
inputs_embeds
,
(
-
1
,
seq_length
,
shape_list
(
inputs_embeds
)[
3
]))
if
inputs_embeds
is
not
None
else
None
)
outputs
=
self
.
mpnet
(
flat_input_ids
,
flat_attention_mask
,
flat_position_ids
,
inputs
[
"
head_mask
"
]
,
head_mask
,
flat_inputs_embeds
,
inputs
[
"
output_attentions
"
]
,
inputs
[
"
output_hidden_states
"
]
,
return_dict
=
inputs
[
"
return_dict
"
]
,
training
=
inputs
[
"
training
"
]
,
output_attentions
,
output_hidden_states
,
return_dict
=
return_dict
,
training
=
training
,
)
pooled_output
=
outputs
[
1
]
pooled_output
=
self
.
dropout
(
pooled_output
,
training
=
inputs
[
"
training
"
]
)
pooled_output
=
self
.
dropout
(
pooled_output
,
training
=
training
)
logits
=
self
.
classifier
(
pooled_output
)
reshaped_logits
=
tf
.
reshape
(
logits
,
(
-
1
,
num_choices
))
loss
=
None
if
inputs
[
"
labels
"
]
is
None
else
self
.
hf_compute_loss
(
inputs
[
"
labels
"
]
,
reshaped_logits
)
loss
=
None
if
labels
is
None
else
self
.
hf_compute_loss
(
labels
,
reshaped_logits
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
reshaped_logits
,)
+
outputs
[
2
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
@@ -1167,6 +1092,7 @@ class TFMPNetForTokenClassification(TFMPNetPreTrainedModel, TFTokenClassificatio
config
.
num_labels
,
kernel_initializer
=
get_initializer
(
config
.
initializer_range
),
name
=
"classifier"
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
MPNET_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
...
...
@@ -1192,10 +1118,7 @@ class TFMPNetForTokenClassification(TFMPNetPreTrainedModel, TFTokenClassificatio
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
mpnet
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
position_ids
=
position_ids
,
...
...
@@ -1204,29 +1127,16 @@ class TFMPNetForTokenClassification(TFMPNetPreTrainedModel, TFTokenClassificatio
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
mpnet
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
sequence_output
=
outputs
[
0
]
sequence_output
=
self
.
dropout
(
sequence_output
,
training
=
inputs
[
"
training
"
]
)
sequence_output
=
self
.
dropout
(
sequence_output
,
training
=
training
)
logits
=
self
.
classifier
(
sequence_output
)
loss
=
None
if
inputs
[
"
labels
"
]
is
None
else
self
.
hf_compute_loss
(
inputs
[
"
labels
"
]
,
logits
)
loss
=
None
if
labels
is
None
else
self
.
hf_compute_loss
(
labels
,
logits
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
logits
,)
+
outputs
[
1
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
@@ -1265,6 +1175,7 @@ class TFMPNetForQuestionAnswering(TFMPNetPreTrainedModel, TFQuestionAnsweringLos
config
.
num_labels
,
kernel_initializer
=
get_initializer
(
config
.
initializer_range
),
name
=
"qa_outputs"
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
MPNET_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
...
...
@@ -1297,11 +1208,8 @@ class TFMPNetForQuestionAnswering(TFMPNetPreTrainedModel, TFQuestionAnsweringLos
Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
are not taken into account for computing the loss.
"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
outputs
=
self
.
mpnet
(
input_ids
,
attention_mask
=
attention_mask
,
position_ids
=
position_ids
,
head_mask
=
head_mask
,
...
...
@@ -1309,21 +1217,7 @@ class TFMPNetForQuestionAnswering(TFMPNetPreTrainedModel, TFQuestionAnsweringLos
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
start_positions
=
start_positions
,
end_positions
=
end_positions
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
mpnet
(
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
sequence_output
=
outputs
[
0
]
...
...
@@ -1333,12 +1227,11 @@ class TFMPNetForQuestionAnswering(TFMPNetPreTrainedModel, TFQuestionAnsweringLos
end_logits
=
tf
.
squeeze
(
end_logits
,
axis
=-
1
)
loss
=
None
if
inputs
[
"start_positions"
]
is
not
None
and
inputs
[
"end_positions"
]
is
not
None
:
labels
=
{
"start_position"
:
inputs
[
"start_positions"
]}
labels
[
"end_position"
]
=
inputs
[
"end_positions"
]
if
start_positions
is
not
None
and
end_positions
is
not
None
:
labels
=
{
"start_position"
:
start_positions
,
"end_position"
:
end_positions
}
loss
=
self
.
hf_compute_loss
(
labels
,
(
start_logits
,
end_logits
))
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
start_logits
,
end_logits
)
+
outputs
[
2
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
src/transformers/models/rag/modeling_tf_rag.py
View file @
611d3a09
...
...
@@ -23,7 +23,7 @@ import tensorflow as tf
from
...configuration_utils
import
PretrainedConfig
from
...file_utils
import
ModelOutput
,
add_start_docstrings_to_model_forward
,
replace_return_docstrings
from
...modeling_tf_utils
import
TFCausalLanguageModelingLoss
,
TFPreTrainedModel
,
input_processing
,
shape_list
from
...modeling_tf_utils
import
TFCausalLanguageModelingLoss
,
TFPreTrainedModel
,
shape_list
,
unpack_inputs
from
...utils
import
logging
from
.configuration_rag
import
RagConfig
from
.retrieval_rag
import
RagRetriever
...
...
@@ -532,6 +532,7 @@ class TFRagModel(TFRagPreTrainedModel):
def
set_retriever
(
self
,
retriever
:
RagRetriever
):
self
.
retriever
=
retriever
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
RAG_FORWARD_INPUTS_DOCSTRING
)
@
replace_return_docstrings
(
output_type
=
TFRetrievAugLMOutput
,
config_class
=
_CONFIG_FOR_DOC
)
def
call
(
...
...
@@ -580,46 +581,8 @@ class TFRagModel(TFRagPreTrainedModel):
"decoder_cached_states"
not
in
kwargs
),
"Please use past_key_values to cache intermediate outputs"
# from modeling_tf_bart.py
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
decoder_input_ids
=
decoder_input_ids
,
decoder_attention_mask
=
decoder_attention_mask
,
encoder_outputs
=
encoder_outputs
,
past_key_values
=
past_key_values
,
doc_scores
=
doc_scores
,
context_input_ids
=
context_input_ids
,
context_attention_mask
=
context_attention_mask
,
use_cache
=
use_cache
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
output_retrieved
=
output_retrieved
,
return_dict
=
return_dict
,
n_docs
=
n_docs
,
training
=
training
,
kwargs_call
=
kwargs
,
)
# aliasing to minimize code changing
input_ids
=
inputs
[
"input_ids"
]
attention_mask
=
inputs
[
"attention_mask"
]
decoder_input_ids
=
inputs
[
"decoder_input_ids"
]
decoder_attention_mask
=
inputs
[
"decoder_attention_mask"
]
encoder_outputs
=
inputs
[
"encoder_outputs"
]
past_key_values
=
inputs
[
"past_key_values"
]
doc_scores
=
inputs
[
"doc_scores"
]
context_input_ids
=
inputs
[
"context_input_ids"
]
context_attention_mask
=
inputs
[
"context_attention_mask"
]
use_cache
=
inputs
[
"use_cache"
]
output_attentions
=
inputs
[
"output_attentions"
]
output_hidden_states
=
inputs
[
"output_hidden_states"
]
return_dict
=
inputs
[
"return_dict"
]
n_docs
=
inputs
[
"n_docs"
]
if
inputs
[
"n_docs"
]
is
not
None
else
self
.
config
.
n_docs
output_retrieved
=
inputs
[
"output_retrieved"
]
training
=
inputs
[
"training"
]
n_docs
=
n_docs
if
n_docs
is
not
None
else
self
.
config
.
n_docs
# whether retriever has to be used
has_to_retrieve
=
(
...
...
@@ -855,6 +818,7 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss
log_prob_sum
=
seq_logprobs
+
doc_logprobs
return
tf
.
reduce_logsumexp
(
log_prob_sum
,
axis
=
1
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
RAG_FORWARD_INPUTS_DOCSTRING
)
@
replace_return_docstrings
(
output_type
=
TFRetrievAugLMMarginOutput
,
config_class
=
_CONFIG_FOR_DOC
)
def
call
(
...
...
@@ -948,72 +912,47 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss
"decoder_cached_states"
not
in
kwargs
),
"Please use past_key_values to cache intermediate outputs"
# from modeling_tf_bart.py
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
do_marginalize
=
do_marginalize
if
do_marginalize
else
self
.
config
.
do_marginalize
reduce_loss
=
reduce_loss
if
reduce_loss
else
self
.
config
.
reduce_loss
if
labels
is
not
None
:
if
decoder_input_ids
is
None
:
decoder_input_ids
=
labels
use_cache
=
False
outputs
=
self
.
rag
(
input_ids
,
attention_mask
=
attention_mask
,
encoder_outputs
=
encoder_outputs
,
decoder_input_ids
=
decoder_input_ids
,
decoder_attention_mask
=
decoder_attention_mask
,
encoder_outputs
=
encoder_outputs
,
past_key_values
=
past_key_values
,
doc_scores
=
doc_scores
,
context_input_ids
=
context_input_ids
,
context_attention_mask
=
context_attention_mask
,
doc_scores
=
doc_scores
,
past_key_values
=
past_key_values
,
use_cache
=
use_cache
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
output_retrieved
=
output_retrieved
,
n_docs
=
n_docs
,
do_marginalize
=
do_marginalize
,
labels
=
labels
,
reduce_loss
=
reduce_loss
,
return_dict
=
return_dict
,
training
=
training
,
kwargs_call
=
kwargs
,
)
inputs
[
"do_marginalize"
]
=
inputs
[
"do_marginalize"
]
if
inputs
[
"do_marginalize"
]
else
self
.
config
.
do_marginalize
inputs
[
"reduce_loss"
]
=
inputs
[
"reduce_loss"
]
if
inputs
[
"reduce_loss"
]
else
self
.
config
.
reduce_loss
if
inputs
[
"labels"
]
is
not
None
:
if
inputs
[
"decoder_input_ids"
]
is
None
:
inputs
[
"decoder_input_ids"
]
=
inputs
[
"labels"
]
inputs
[
"use_cache"
]
=
False
outputs
=
self
.
rag
(
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
encoder_outputs
=
inputs
[
"encoder_outputs"
],
decoder_input_ids
=
inputs
[
"decoder_input_ids"
],
decoder_attention_mask
=
inputs
[
"decoder_attention_mask"
],
context_input_ids
=
inputs
[
"context_input_ids"
],
context_attention_mask
=
inputs
[
"context_attention_mask"
],
doc_scores
=
inputs
[
"doc_scores"
],
past_key_values
=
inputs
[
"past_key_values"
],
use_cache
=
inputs
[
"use_cache"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
output_retrieved
=
inputs
[
"output_retrieved"
],
n_docs
=
inputs
[
"n_docs"
],
training
=
inputs
[
"training"
],
)
loss
=
None
logits
=
outputs
.
logits
if
inputs
[
"
labels
"
]
is
not
None
:
assert
inputs
[
"
decoder_input_ids
"
]
is
not
None
if
labels
is
not
None
:
assert
decoder_input_ids
is
not
None
loss
=
self
.
get_nll
(
outputs
.
logits
,
outputs
.
doc_scores
,
inputs
[
"
labels
"
]
,
reduce_loss
=
inputs
[
"
reduce_loss
"
]
,
labels
,
reduce_loss
=
reduce_loss
,
epsilon
=
self
.
config
.
label_smoothing
,
n_docs
=
inputs
[
"
n_docs
"
]
,
n_docs
=
n_docs
,
)
if
inputs
[
"
do_marginalize
"
]
:
logits
=
self
.
marginalize
(
logits
,
outputs
.
doc_scores
,
inputs
[
"
n_docs
"
]
)
if
do_marginalize
:
logits
=
self
.
marginalize
(
logits
,
outputs
.
doc_scores
,
n_docs
)
return
TFRetrievAugLMMarginOutput
(
loss
=
loss
,
...
...
@@ -1465,6 +1404,7 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL
def
question_encoder
(
self
):
return
self
.
rag
.
question_encoder
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
RAG_FORWARD_INPUTS_DOCSTRING
)
@
replace_return_docstrings
(
output_type
=
TFRetrievAugLMMarginOutput
,
config_class
=
_CONFIG_FOR_DOC
)
def
call
(
...
...
@@ -1559,68 +1499,41 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL
"decoder_cached_states"
not
in
kwargs
),
"Please use past_key_values to cache intermediate outputs"
# from modeling_tf_bart.py
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
exclude_bos_score
=
exclude_bos_score
if
exclude_bos_score
else
self
.
config
.
exclude_bos_score
reduce_loss
=
reduce_loss
if
reduce_loss
else
self
.
config
.
reduce_loss
if
labels
is
not
None
:
if
decoder_input_ids
is
None
:
decoder_input_ids
=
labels
use_cache
=
False
outputs
=
self
.
rag
(
input_ids
,
attention_mask
=
attention_mask
,
encoder_outputs
=
encoder_outputs
,
decoder_input_ids
=
decoder_input_ids
,
decoder_attention_mask
=
decoder_attention_mask
,
encoder_outputs
=
encoder_outputs
,
past_key_values
=
past_key_values
,
doc_scores
=
doc_scores
,
context_input_ids
=
context_input_ids
,
context_attention_mask
=
context_attention_mask
,
doc_scores
=
doc_scores
,
past_key_values
=
past_key_values
,
use_cache
=
use_cache
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
output_retrieved
=
output_retrieved
,
n_docs
=
n_docs
,
exclude_bos_score
=
exclude_bos_score
,
labels
=
labels
,
reduce_loss
=
reduce_loss
,
training
=
training
,
return_dict
=
return_dict
,
kwargs_call
=
kwargs
,
)
inputs
[
"exclude_bos_score"
]
=
(
inputs
[
"exclude_bos_score"
]
if
inputs
[
"exclude_bos_score"
]
else
self
.
config
.
exclude_bos_score
)
inputs
[
"reduce_loss"
]
=
inputs
[
"reduce_loss"
]
if
inputs
[
"reduce_loss"
]
else
self
.
config
.
reduce_loss
if
inputs
[
"labels"
]
is
not
None
:
if
inputs
[
"decoder_input_ids"
]
is
None
:
inputs
[
"decoder_input_ids"
]
=
inputs
[
"labels"
]
inputs
[
"use_cache"
]
=
False
outputs
=
self
.
rag
(
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
encoder_outputs
=
inputs
[
"encoder_outputs"
],
decoder_input_ids
=
inputs
[
"decoder_input_ids"
],
decoder_attention_mask
=
inputs
[
"decoder_attention_mask"
],
context_input_ids
=
inputs
[
"context_input_ids"
],
context_attention_mask
=
inputs
[
"context_attention_mask"
],
doc_scores
=
inputs
[
"doc_scores"
],
past_key_values
=
inputs
[
"past_key_values"
],
use_cache
=
inputs
[
"use_cache"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
output_retrieved
=
inputs
[
"output_retrieved"
],
n_docs
=
inputs
[
"n_docs"
],
training
=
inputs
[
"training"
],
)
loss
=
None
if
inputs
[
"
labels
"
]
is
not
None
:
if
labels
is
not
None
:
loss
=
self
.
get_nll
(
outputs
.
logits
,
outputs
.
doc_scores
,
inputs
[
"
labels
"
]
,
reduce_loss
=
inputs
[
"
reduce_loss
"
]
,
labels
,
reduce_loss
=
reduce_loss
,
epsilon
=
self
.
config
.
label_smoothing
,
n_docs
=
inputs
[
"
n_docs
"
]
,
n_docs
=
n_docs
,
)
return
TFRetrievAugLMMarginOutput
(
...
...
src/transformers/models/roformer/modeling_tf_roformer.py
View file @
611d3a09
...
...
@@ -49,8 +49,8 @@ from ...modeling_tf_utils import (
TFSequenceSummary
,
TFTokenClassificationLoss
,
get_initializer
,
input_processing
,
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
from
...utils
import
logging
...
...
@@ -602,6 +602,7 @@ class TFRoFormerMainLayer(tf.keras.layers.Layer):
"""
raise
NotImplementedError
@
unpack_inputs
def
call
(
self
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
...
...
@@ -615,51 +616,37 @@ class TFRoFormerMainLayer(tf.keras.layers.Layer):
training
:
bool
=
False
,
**
kwargs
,
)
->
Union
[
TFBaseModelOutput
,
Tuple
[
tf
.
Tensor
]]:
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
head_mask
=
head_mask
,
inputs_embeds
=
inputs_embeds
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
training
=
training
,
kwargs_call
=
kwargs
,
)
if
inputs
[
"
input_ids
"
]
is
not
None
and
inputs
[
"
inputs_embeds
"
]
is
not
None
:
if
input_ids
is
not
None
and
inputs_embeds
is
not
None
:
raise
ValueError
(
"You cannot specify both input_ids and inputs_embeds at the same time"
)
elif
inputs
[
"
input_ids
"
]
is
not
None
:
input_shape
=
shape_list
(
inputs
[
"
input_ids
"
]
)
elif
inputs
[
"
inputs_embeds
"
]
is
not
None
:
input_shape
=
shape_list
(
inputs
[
"inputs
_embeds
"
]
)[:
-
1
]
elif
input_ids
is
not
None
:
input_shape
=
shape_list
(
input_ids
)
elif
inputs_embeds
is
not
None
:
input_shape
=
shape_list
(
inputs_embeds
)[:
-
1
]
else
:
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
if
inputs
[
"
attention_mask
"
]
is
None
:
inputs
[
"
attention_mask
"
]
=
tf
.
fill
(
dims
=
input_shape
,
value
=
1
)
if
attention_mask
is
None
:
attention_mask
=
tf
.
fill
(
dims
=
input_shape
,
value
=
1
)
if
inputs
[
"
token_type_ids
"
]
is
None
:
inputs
[
"
token_type_ids
"
]
=
tf
.
fill
(
dims
=
input_shape
,
value
=
0
)
if
token_type_ids
is
None
:
token_type_ids
=
tf
.
fill
(
dims
=
input_shape
,
value
=
0
)
embedding_output
=
self
.
embeddings
(
input_ids
=
inputs
[
"
input_ids
"
]
,
token_type_ids
=
inputs
[
"
token_type_ids
"
]
,
inputs_embeds
=
inputs
[
"inputs
_embeds
"
]
,
training
=
inputs
[
"
training
"
]
,
input_ids
=
input_ids
,
token_type_ids
=
token_type_ids
,
inputs_embeds
=
inputs_embeds
,
training
=
training
,
)
if
hasattr
(
self
,
"embeddings_project"
):
embedding_output
=
self
.
embeddings_project
(
embedding_output
,
training
=
inputs
[
"
training
"
]
)
embedding_output
=
self
.
embeddings_project
(
embedding_output
,
training
=
training
)
# We create a 3D attention mask from a 2D tensor mask.
# Sizes are [batch_size, 1, 1, to_seq_length]
# So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
# this attention mask is more simple than the triangular masking of causal attention
# used in OpenAI GPT, we just need to prepare the broadcast dimension here.
extended_attention_mask
=
tf
.
reshape
(
inputs
[
"
attention_mask
"
]
,
(
input_shape
[
0
],
1
,
1
,
input_shape
[
1
]))
extended_attention_mask
=
tf
.
reshape
(
attention_mask
,
(
input_shape
[
0
],
1
,
1
,
input_shape
[
1
]))
# Since attention_mask is 1.0 for positions we want to attend and 0.0 for
# masked positions, this operation will create a tensor which is 0.0 for
...
...
@@ -676,24 +663,24 @@ class TFRoFormerMainLayer(tf.keras.layers.Layer):
# attention_probs has shape bsz x n_heads x N x N
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
if
inputs
[
"
head_mask
"
]
is
not
None
:
if
head_mask
is
not
None
:
raise
NotImplementedError
else
:
inputs
[
"
head_mask
"
]
=
[
None
]
*
self
.
config
.
num_hidden_layers
head_mask
=
[
None
]
*
self
.
config
.
num_hidden_layers
encoder_outputs
=
self
.
encoder
(
hidden_states
=
embedding_output
,
attention_mask
=
extended_attention_mask
,
head_mask
=
inputs
[
"
head_mask
"
]
,
output_attentions
=
inputs
[
"
output_attentions
"
]
,
output_hidden_states
=
inputs
[
"
output_hidden_states
"
]
,
return_dict
=
inputs
[
"
return_dict
"
]
,
training
=
inputs
[
"
training
"
]
,
head_mask
=
head_mask
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
training
=
training
,
)
sequence_output
=
encoder_outputs
[
0
]
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
return
(
sequence_output
,)
+
encoder_outputs
[
1
:]
return
TFBaseModelOutput
(
...
...
@@ -811,6 +798,7 @@ class TFRoFormerModel(TFRoFormerPreTrainedModel):
self
.
roformer
=
TFRoFormerMainLayer
(
config
,
name
=
"roformer"
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
ROFORMER_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
...
...
@@ -831,9 +819,7 @@ class TFRoFormerModel(TFRoFormerPreTrainedModel):
training
:
Optional
[
bool
]
=
False
,
**
kwargs
,
)
->
Union
[
TFBaseModelOutputWithPooling
,
Tuple
[
tf
.
Tensor
]]:
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
roformer
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
...
...
@@ -843,18 +829,6 @@ class TFRoFormerModel(TFRoFormerPreTrainedModel):
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
roformer
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
return
outputs
...
...
@@ -883,6 +857,7 @@ class TFRoFormerForMaskedLM(TFRoFormerPreTrainedModel, TFMaskedLanguageModelingL
def
get_lm_head
(
self
)
->
tf
.
keras
.
layers
.
Layer
:
return
self
.
mlm
.
predictions
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
ROFORMER_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
...
...
@@ -910,9 +885,7 @@ class TFRoFormerForMaskedLM(TFRoFormerPreTrainedModel, TFMaskedLanguageModelingL
config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
roformer
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
...
...
@@ -921,30 +894,13 @@ class TFRoFormerForMaskedLM(TFRoFormerPreTrainedModel, TFMaskedLanguageModelingL
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
roformer
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
sequence_output
=
outputs
[
0
]
prediction_scores
=
self
.
mlm
(
sequence_output
=
sequence_output
,
training
=
inputs
[
"training"
])
loss
=
(
None
if
inputs
[
"labels"
]
is
None
else
self
.
hf_compute_loss
(
labels
=
inputs
[
"labels"
],
logits
=
prediction_scores
)
)
prediction_scores
=
self
.
mlm
(
sequence_output
=
sequence_output
,
training
=
training
)
loss
=
None
if
labels
is
None
else
self
.
hf_compute_loss
(
labels
=
labels
,
logits
=
prediction_scores
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
prediction_scores
,)
+
outputs
[
2
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
@@ -978,6 +934,7 @@ class TFRoFormerForCausalLM(TFRoFormerPreTrainedModel, TFCausalLanguageModelingL
def
get_lm_head
(
self
)
->
tf
.
keras
.
layers
.
Layer
:
return
self
.
mlm
.
predictions
@
unpack_inputs
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
checkpoint
=
_CHECKPOINT_FOR_DOC
,
...
...
@@ -1003,9 +960,7 @@ class TFRoFormerForCausalLM(TFRoFormerPreTrainedModel, TFCausalLanguageModelingL
Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,
config.vocab_size - 1]`.
"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
roformer
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
...
...
@@ -1014,32 +969,19 @@ class TFRoFormerForCausalLM(TFRoFormerPreTrainedModel, TFCausalLanguageModelingL
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
roformer
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
sequence_output
=
outputs
[
0
]
logits
=
self
.
mlm
(
sequence_output
=
sequence_output
,
training
=
inputs
[
"
training
"
]
)
logits
=
self
.
mlm
(
sequence_output
=
sequence_output
,
training
=
training
)
loss
=
None
if
inputs
[
"
labels
"
]
is
not
None
:
if
labels
is
not
None
:
# shift labels to the left and cut last logit token
shifted_logits
=
logits
[:,
:
-
1
]
labels
=
inputs
[
"
labels
"
]
[:,
1
:]
labels
=
labels
[:,
1
:]
loss
=
self
.
hf_compute_loss
(
labels
=
labels
,
logits
=
shifted_logits
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
logits
,)
+
outputs
[
2
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
@@ -1102,6 +1044,7 @@ class TFRoFormerForSequenceClassification(TFRoFormerPreTrainedModel, TFSequenceC
self
.
roformer
=
TFRoFormerMainLayer
(
config
,
name
=
"roformer"
)
self
.
classifier
=
TFRoFormerClassificationHead
(
config
,
name
=
"classifier"
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
ROFORMER_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
...
...
@@ -1129,9 +1072,7 @@ class TFRoFormerForSequenceClassification(TFRoFormerPreTrainedModel, TFSequenceC
config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
roformer
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
...
...
@@ -1140,25 +1081,12 @@ class TFRoFormerForSequenceClassification(TFRoFormerPreTrainedModel, TFSequenceC
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
roformer
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
logits
=
self
.
classifier
(
hidden_states
=
outputs
[
0
],
training
=
inputs
[
"
training
"
]
)
loss
=
None
if
inputs
[
"
labels
"
]
is
None
else
self
.
hf_compute_loss
(
labels
=
inputs
[
"
labels
"
]
,
logits
=
logits
)
logits
=
self
.
classifier
(
hidden_states
=
outputs
[
0
],
training
=
training
)
loss
=
None
if
labels
is
None
else
self
.
hf_compute_loss
(
labels
=
labels
,
logits
=
logits
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
logits
,)
+
outputs
[
1
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
@@ -1205,6 +1133,7 @@ class TFRoFormerForMultipleChoice(TFRoFormerPreTrainedModel, TFMultipleChoiceLos
"""
return
{
"input_ids"
:
tf
.
constant
(
MULTIPLE_CHOICE_DUMMY_INPUTS
)}
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
ROFORMER_INPUTS_DOCSTRING
.
format
(
"batch_size, num_choices, sequence_length"
)
)
...
...
@@ -1233,66 +1162,42 @@ class TFRoFormerForMultipleChoice(TFRoFormerPreTrainedModel, TFMultipleChoiceLos
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
where `num_choices` is the size of the second dimension of the input tensors. (See `input_ids` above)
"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
head_mask
=
head_mask
,
inputs_embeds
=
inputs_embeds
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
kwargs_call
=
kwargs
,
)
if
inputs
[
"input_ids"
]
is
not
None
:
num_choices
=
shape_list
(
inputs
[
"input_ids"
])[
1
]
seq_length
=
shape_list
(
inputs
[
"input_ids"
])[
2
]
if
input_ids
is
not
None
:
num_choices
=
shape_list
(
input_ids
)[
1
]
seq_length
=
shape_list
(
input_ids
)[
2
]
else
:
num_choices
=
shape_list
(
inputs
[
"inputs
_embeds
"
]
)[
1
]
seq_length
=
shape_list
(
inputs
[
"inputs
_embeds
"
]
)[
2
]
num_choices
=
shape_list
(
inputs_embeds
)[
1
]
seq_length
=
shape_list
(
inputs_embeds
)[
2
]
flat_input_ids
=
(
tf
.
reshape
(
tensor
=
inputs
[
"input_ids"
],
shape
=
(
-
1
,
seq_length
))
if
inputs
[
"input_ids"
]
is
not
None
else
None
)
flat_input_ids
=
tf
.
reshape
(
tensor
=
input_ids
,
shape
=
(
-
1
,
seq_length
))
if
input_ids
is
not
None
else
None
flat_attention_mask
=
(
tf
.
reshape
(
tensor
=
inputs
[
"attention_mask"
],
shape
=
(
-
1
,
seq_length
))
if
inputs
[
"attention_mask"
]
is
not
None
else
None
tf
.
reshape
(
tensor
=
attention_mask
,
shape
=
(
-
1
,
seq_length
))
if
attention_mask
is
not
None
else
None
)
flat_token_type_ids
=
(
tf
.
reshape
(
tensor
=
inputs
[
"token_type_ids"
],
shape
=
(
-
1
,
seq_length
))
if
inputs
[
"token_type_ids"
]
is
not
None
else
None
tf
.
reshape
(
tensor
=
token_type_ids
,
shape
=
(
-
1
,
seq_length
))
if
token_type_ids
is
not
None
else
None
)
flat_inputs_embeds
=
(
tf
.
reshape
(
tensor
=
inputs
[
"inputs
_embeds
"
]
,
shape
=
(
-
1
,
seq_length
,
shape_list
(
inputs
[
"inputs
_embeds
"
]
)[
3
]))
if
inputs
[
"
inputs_embeds
"
]
is
not
None
tf
.
reshape
(
tensor
=
inputs_embeds
,
shape
=
(
-
1
,
seq_length
,
shape_list
(
inputs_embeds
)[
3
]))
if
inputs_embeds
is
not
None
else
None
)
outputs
=
self
.
roformer
(
input_ids
=
flat_input_ids
,
attention_mask
=
flat_attention_mask
,
token_type_ids
=
flat_token_type_ids
,
head_mask
=
inputs
[
"
head_mask
"
]
,
head_mask
=
head_mask
,
inputs_embeds
=
flat_inputs_embeds
,
output_attentions
=
inputs
[
"
output_attentions
"
]
,
output_hidden_states
=
inputs
[
"
output_hidden_states
"
]
,
return_dict
=
inputs
[
"
return_dict
"
]
,
training
=
inputs
[
"
training
"
]
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
training
=
training
,
)
logits
=
self
.
sequence_summary
(
inputs
=
outputs
[
0
],
training
=
inputs
[
"
training
"
]
)
logits
=
self
.
sequence_summary
(
inputs
=
outputs
[
0
],
training
=
training
)
logits
=
self
.
classifier
(
inputs
=
logits
)
reshaped_logits
=
tf
.
reshape
(
tensor
=
logits
,
shape
=
(
-
1
,
num_choices
))
loss
=
(
None
if
inputs
[
"labels"
]
is
None
else
self
.
hf_compute_loss
(
labels
=
inputs
[
"labels"
],
logits
=
reshaped_logits
)
)
loss
=
None
if
labels
is
None
else
self
.
hf_compute_loss
(
labels
=
labels
,
logits
=
reshaped_logits
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
reshaped_logits
,)
+
outputs
[
1
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
@@ -1344,6 +1249,7 @@ class TFRoFormerForTokenClassification(TFRoFormerPreTrainedModel, TFTokenClassif
units
=
config
.
num_labels
,
kernel_initializer
=
get_initializer
(
config
.
initializer_range
),
name
=
"classifier"
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
ROFORMER_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
...
...
@@ -1369,9 +1275,7 @@ class TFRoFormerForTokenClassification(TFRoFormerPreTrainedModel, TFTokenClassif
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
roformer
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
...
...
@@ -1380,27 +1284,14 @@ class TFRoFormerForTokenClassification(TFRoFormerPreTrainedModel, TFTokenClassif
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
roformer
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
sequence_output
=
outputs
[
0
]
sequence_output
=
self
.
dropout
(
inputs
=
sequence_output
,
training
=
inputs
[
"
training
"
]
)
sequence_output
=
self
.
dropout
(
inputs
=
sequence_output
,
training
=
training
)
logits
=
self
.
classifier
(
inputs
=
sequence_output
)
loss
=
None
if
inputs
[
"
labels
"
]
is
None
else
self
.
hf_compute_loss
(
labels
=
inputs
[
"
labels
"
]
,
logits
=
logits
)
loss
=
None
if
labels
is
None
else
self
.
hf_compute_loss
(
labels
=
labels
,
logits
=
logits
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
logits
,)
+
outputs
[
1
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
@@ -1436,6 +1327,7 @@ class TFRoFormerForQuestionAnswering(TFRoFormerPreTrainedModel, TFQuestionAnswer
units
=
config
.
num_labels
,
kernel_initializer
=
get_initializer
(
config
.
initializer_range
),
name
=
"qa_outputs"
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
ROFORMER_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
...
...
@@ -1468,9 +1360,7 @@ class TFRoFormerForQuestionAnswering(TFRoFormerPreTrainedModel, TFQuestionAnswer
Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
are not taken into account for computing the loss.
"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
roformer
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
...
...
@@ -1479,21 +1369,7 @@ class TFRoFormerForQuestionAnswering(TFRoFormerPreTrainedModel, TFQuestionAnswer
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
start_positions
=
start_positions
,
end_positions
=
end_positions
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
roformer
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
sequence_output
=
outputs
[
0
]
logits
=
self
.
qa_outputs
(
inputs
=
sequence_output
)
...
...
@@ -1502,12 +1378,11 @@ class TFRoFormerForQuestionAnswering(TFRoFormerPreTrainedModel, TFQuestionAnswer
end_logits
=
tf
.
squeeze
(
input
=
end_logits
,
axis
=-
1
)
loss
=
None
if
inputs
[
"start_positions"
]
is
not
None
and
inputs
[
"end_positions"
]
is
not
None
:
labels
=
{
"start_position"
:
inputs
[
"start_positions"
]}
labels
[
"end_position"
]
=
inputs
[
"end_positions"
]
if
start_positions
is
not
None
and
end_positions
is
not
None
:
labels
=
{
"start_position"
:
start_positions
,
"end_position"
:
end_positions
}
loss
=
self
.
hf_compute_loss
(
labels
=
labels
,
logits
=
(
start_logits
,
end_logits
))
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
start_logits
,
end_logits
)
+
outputs
[
2
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment