Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
37793259
"...models/git@developer.sourcefind.cn:Wenxuan/LightX2V.git" did not exist on "f7cdbcb5ee0590cbf28ce0237f393202d68d3670"
Unverified
Commit
37793259
authored
Mar 14, 2022
by
Joydeep Bhattacharjee
Committed by
GitHub
Mar 14, 2022
Browse files
update albert with tf decorator (#16147)
parent
e109edf1
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
74 additions
and
204 deletions
+74
-204
src/transformers/models/albert/modeling_tf_albert.py
src/transformers/models/albert/modeling_tf_albert.py
+74
-204
No files found.
src/transformers/models/albert/modeling_tf_albert.py
View file @
37793259
...
...
@@ -49,8 +49,8 @@ from ...modeling_tf_utils import (
TFSequenceClassificationLoss
,
TFTokenClassificationLoss
,
get_initializer
,
input_processing
,
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
from
...utils
import
logging
...
...
@@ -538,6 +538,7 @@ class TFAlbertMainLayer(tf.keras.layers.Layer):
"""
raise
NotImplementedError
@
unpack_inputs
def
call
(
self
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
...
...
@@ -552,43 +553,28 @@ class TFAlbertMainLayer(tf.keras.layers.Layer):
training
:
bool
=
False
,
**
kwargs
,
)
->
Union
[
TFBaseModelOutputWithPooling
,
Tuple
[
tf
.
Tensor
]]:
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
,
head_mask
=
head_mask
,
inputs_embeds
=
inputs_embeds
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
training
=
training
,
kwargs_call
=
kwargs
,
)
if
inputs
[
"
input_ids
"
]
is
not
None
and
inputs
[
"
inputs_embeds
"
]
is
not
None
:
if
input_ids
is
not
None
and
inputs_embeds
is
not
None
:
raise
ValueError
(
"You cannot specify both input_ids and inputs_embeds at the same time"
)
elif
inputs
[
"
input_ids
"
]
is
not
None
:
input_shape
=
shape_list
(
inputs
[
"
input_ids
"
]
)
elif
inputs
[
"
inputs_embeds
"
]
is
not
None
:
input_shape
=
shape_list
(
inputs
[
"inputs
_embeds
"
]
)[:
-
1
]
elif
input_ids
is
not
None
:
input_shape
=
shape_list
(
input_ids
)
elif
inputs_embeds
is
not
None
:
input_shape
=
shape_list
(
inputs_embeds
)[:
-
1
]
else
:
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
if
inputs
[
"
attention_mask
"
]
is
None
:
inputs
[
"
attention_mask
"
]
=
tf
.
fill
(
dims
=
input_shape
,
value
=
1
)
if
attention_mask
is
None
:
attention_mask
=
tf
.
fill
(
dims
=
input_shape
,
value
=
1
)
if
inputs
[
"
token_type_ids
"
]
is
None
:
inputs
[
"
token_type_ids
"
]
=
tf
.
fill
(
dims
=
input_shape
,
value
=
0
)
if
token_type_ids
is
None
:
token_type_ids
=
tf
.
fill
(
dims
=
input_shape
,
value
=
0
)
embedding_output
=
self
.
embeddings
(
input_ids
=
inputs
[
"
input_ids
"
]
,
position_ids
=
inputs
[
"
position_ids
"
]
,
token_type_ids
=
inputs
[
"
token_type_ids
"
]
,
inputs_embeds
=
inputs
[
"inputs
_embeds
"
]
,
training
=
inputs
[
"
training
"
]
,
input_ids
=
input_ids
,
position_ids
=
position_ids
,
token_type_ids
=
token_type_ids
,
inputs_embeds
=
inputs_embeds
,
training
=
training
,
)
# We create a 3D attention mask from a 2D tensor mask.
...
...
@@ -596,7 +582,7 @@ class TFAlbertMainLayer(tf.keras.layers.Layer):
# So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
# this attention mask is more simple than the triangular masking of causal attention
# used in OpenAI GPT, we just need to prepare the broadcast dimension here.
extended_attention_mask
=
tf
.
reshape
(
inputs
[
"
attention_mask
"
]
,
(
input_shape
[
0
],
1
,
1
,
input_shape
[
1
]))
extended_attention_mask
=
tf
.
reshape
(
attention_mask
,
(
input_shape
[
0
],
1
,
1
,
input_shape
[
1
]))
# Since attention_mask is 1.0 for positions we want to attend and 0.0 for
# masked positions, this operation will create a tensor which is 0.0 for
...
...
@@ -613,25 +599,25 @@ class TFAlbertMainLayer(tf.keras.layers.Layer):
# attention_probs has shape bsz x n_heads x N x N
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
if
inputs
[
"
head_mask
"
]
is
not
None
:
if
head_mask
is
not
None
:
raise
NotImplementedError
else
:
inputs
[
"
head_mask
"
]
=
[
None
]
*
self
.
config
.
num_hidden_layers
head_mask
=
[
None
]
*
self
.
config
.
num_hidden_layers
encoder_outputs
=
self
.
encoder
(
hidden_states
=
embedding_output
,
attention_mask
=
extended_attention_mask
,
head_mask
=
inputs
[
"
head_mask
"
]
,
output_attentions
=
inputs
[
"
output_attentions
"
]
,
output_hidden_states
=
inputs
[
"
output_hidden_states
"
]
,
return_dict
=
inputs
[
"
return_dict
"
]
,
training
=
inputs
[
"
training
"
]
,
head_mask
=
head_mask
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
training
=
training
,
)
sequence_output
=
encoder_outputs
[
0
]
pooled_output
=
self
.
pooler
(
inputs
=
sequence_output
[:,
0
])
if
self
.
pooler
is
not
None
else
None
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
return
(
sequence_output
,
pooled_output
,
...
...
@@ -779,6 +765,7 @@ class TFAlbertModel(TFAlbertPreTrainedModel):
self
.
albert
=
TFAlbertMainLayer
(
config
,
name
=
"albert"
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
ALBERT_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
...
...
@@ -800,9 +787,7 @@ class TFAlbertModel(TFAlbertPreTrainedModel):
training
:
Optional
[
bool
]
=
False
,
**
kwargs
,
)
->
Union
[
TFBaseModelOutputWithPooling
,
Tuple
[
tf
.
Tensor
]]:
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
albert
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
...
...
@@ -813,19 +798,6 @@ class TFAlbertModel(TFAlbertPreTrainedModel):
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
albert
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
return
outputs
...
...
@@ -865,6 +837,7 @@ class TFAlbertForPreTraining(TFAlbertPreTrainedModel, TFAlbertPreTrainingLoss):
def
get_lm_head
(
self
)
->
tf
.
keras
.
layers
.
Layer
:
return
self
.
predictions
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
ALBERT_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
replace_return_docstrings
(
output_type
=
TFAlbertForPreTrainingOutput
,
config_class
=
_CONFIG_FOR_DOC
)
def
call
(
...
...
@@ -904,9 +877,7 @@ class TFAlbertForPreTraining(TFAlbertPreTrainedModel, TFAlbertPreTrainingLoss):
>>> sop_logits = outputs.sop_logits
```"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
albert
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
...
...
@@ -916,34 +887,19 @@ class TFAlbertForPreTraining(TFAlbertPreTrainedModel, TFAlbertPreTrainingLoss):
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
labels
=
labels
,
sentence_order_label
=
sentence_order_label
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
albert
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
sequence_output
,
pooled_output
=
outputs
[:
2
]
prediction_scores
=
self
.
predictions
(
hidden_states
=
sequence_output
)
sop_scores
=
self
.
sop_classifier
(
pooled_output
=
pooled_output
,
training
=
inputs
[
"
training
"
]
)
sop_scores
=
self
.
sop_classifier
(
pooled_output
=
pooled_output
,
training
=
training
)
total_loss
=
None
if
inputs
[
"
labels
"
]
is
not
None
and
inputs
[
"
sentence_order_label
"
]
is
not
None
:
d_labels
=
{
"labels"
:
inputs
[
"
labels
"
]
}
d_labels
[
"sentence_order_label"
]
=
inputs
[
"
sentence_order_label
"
]
if
labels
is
not
None
and
sentence_order_label
is
not
None
:
d_labels
=
{
"labels"
:
labels
}
d_labels
[
"sentence_order_label"
]
=
sentence_order_label
total_loss
=
self
.
hf_compute_loss
(
labels
=
d_labels
,
logits
=
(
prediction_scores
,
sop_scores
))
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
prediction_scores
,
sop_scores
)
+
outputs
[
2
:]
return
((
total_loss
,)
+
output
)
if
total_loss
is
not
None
else
output
...
...
@@ -999,6 +955,7 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss)
def
get_lm_head
(
self
)
->
tf
.
keras
.
layers
.
Layer
:
return
self
.
predictions
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
ALBERT_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
...
...
@@ -1027,9 +984,7 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss)
config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
albert
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
...
...
@@ -1039,31 +994,13 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss)
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
albert
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
sequence_output
=
outputs
[
0
]
prediction_scores
=
self
.
predictions
(
hidden_states
=
sequence_output
,
training
=
inputs
[
"training"
])
loss
=
(
None
if
inputs
[
"labels"
]
is
None
else
self
.
hf_compute_loss
(
labels
=
inputs
[
"labels"
],
logits
=
prediction_scores
)
)
prediction_scores
=
self
.
predictions
(
hidden_states
=
sequence_output
,
training
=
training
)
loss
=
None
if
labels
is
None
else
self
.
hf_compute_loss
(
labels
=
labels
,
logits
=
prediction_scores
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
prediction_scores
,)
+
outputs
[
2
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
@@ -1106,6 +1043,7 @@ class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel, TFSequenceClass
units
=
config
.
num_labels
,
kernel_initializer
=
get_initializer
(
config
.
initializer_range
),
name
=
"classifier"
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
ALBERT_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
...
...
@@ -1134,9 +1072,7 @@ class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel, TFSequenceClass
config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
albert
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
...
...
@@ -1146,28 +1082,14 @@ class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel, TFSequenceClass
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
albert
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
pooled_output
=
outputs
[
1
]
pooled_output
=
self
.
dropout
(
inputs
=
pooled_output
,
training
=
inputs
[
"
training
"
]
)
pooled_output
=
self
.
dropout
(
inputs
=
pooled_output
,
training
=
training
)
logits
=
self
.
classifier
(
inputs
=
pooled_output
)
loss
=
None
if
inputs
[
"
labels
"
]
is
None
else
self
.
hf_compute_loss
(
labels
=
inputs
[
"
labels
"
]
,
logits
=
logits
)
loss
=
None
if
labels
is
None
else
self
.
hf_compute_loss
(
labels
=
labels
,
logits
=
logits
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
logits
,)
+
outputs
[
2
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
@@ -1215,6 +1137,7 @@ class TFAlbertForTokenClassification(TFAlbertPreTrainedModel, TFTokenClassificat
units
=
config
.
num_labels
,
kernel_initializer
=
get_initializer
(
config
.
initializer_range
),
name
=
"classifier"
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
ALBERT_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
...
...
@@ -1241,9 +1164,7 @@ class TFAlbertForTokenClassification(TFAlbertPreTrainedModel, TFTokenClassificat
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
albert
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
...
...
@@ -1253,28 +1174,14 @@ class TFAlbertForTokenClassification(TFAlbertPreTrainedModel, TFTokenClassificat
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
albert
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
return_dict
,
training
=
inputs
[
"training"
],
)
sequence_output
=
outputs
[
0
]
sequence_output
=
self
.
dropout
(
inputs
=
sequence_output
,
training
=
inputs
[
"
training
"
]
)
sequence_output
=
self
.
dropout
(
inputs
=
sequence_output
,
training
=
training
)
logits
=
self
.
classifier
(
inputs
=
sequence_output
)
loss
=
None
if
inputs
[
"
labels
"
]
is
None
else
self
.
hf_compute_loss
(
labels
=
inputs
[
"
labels
"
]
,
logits
=
logits
)
loss
=
None
if
labels
is
None
else
self
.
hf_compute_loss
(
labels
=
labels
,
logits
=
logits
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
logits
,)
+
outputs
[
2
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
@@ -1315,6 +1222,7 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel, TFQuestionAnsweringL
units
=
config
.
num_labels
,
kernel_initializer
=
get_initializer
(
config
.
initializer_range
),
name
=
"qa_outputs"
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
ALBERT_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
...
...
@@ -1348,9 +1256,7 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel, TFQuestionAnsweringL
Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
are not taken into account for computing the loss.
"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
outputs
=
self
.
albert
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
...
...
@@ -1360,22 +1266,7 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel, TFQuestionAnsweringL
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
start_positions
=
start_positions
,
end_positions
=
end_positions
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
albert
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
sequence_output
=
outputs
[
0
]
logits
=
self
.
qa_outputs
(
inputs
=
sequence_output
)
...
...
@@ -1384,12 +1275,12 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel, TFQuestionAnsweringL
end_logits
=
tf
.
squeeze
(
input
=
end_logits
,
axis
=-
1
)
loss
=
None
if
inputs
[
"
start_positions
"
]
is
not
None
and
inputs
[
"
end_positions
"
]
is
not
None
:
labels
=
{
"start_position"
:
inputs
[
"
start_positions
"
]
}
labels
[
"end_position"
]
=
inputs
[
"
end_positions
"
]
if
start_positions
is
not
None
and
end_positions
is
not
None
:
labels
=
{
"start_position"
:
start_positions
}
labels
[
"end_position"
]
=
end_positions
loss
=
self
.
hf_compute_loss
(
labels
=
labels
,
logits
=
(
start_logits
,
end_logits
))
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
start_logits
,
end_logits
)
+
outputs
[
2
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
@@ -1443,6 +1334,7 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss):
"""
return
{
"input_ids"
:
tf
.
constant
(
MULTIPLE_CHOICE_DUMMY_INPUTS
)}
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
ALBERT_INPUTS_DOCSTRING
.
format
(
"batch_size, num_choices, sequence_length"
))
@
add_code_sample_docstrings
(
processor_class
=
_TOKENIZER_FOR_DOC
,
...
...
@@ -1470,47 +1362,27 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
where `num_choices` is the size of the second dimension of the input tensors. (See `input_ids` above)
"""
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
,
head_mask
=
head_mask
,
inputs_embeds
=
inputs_embeds
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
kwargs_call
=
kwargs
,
)
if
inputs
[
"
input_ids
"
]
is
not
None
:
num_choices
=
shape_list
(
inputs
[
"
input_ids
"
]
)[
1
]
seq_length
=
shape_list
(
inputs
[
"
input_ids
"
]
)[
2
]
if
input_ids
is
not
None
:
num_choices
=
shape_list
(
input_ids
)[
1
]
seq_length
=
shape_list
(
input_ids
)[
2
]
else
:
num_choices
=
shape_list
(
inputs
[
"inputs
_embeds
"
]
)[
1
]
seq_length
=
shape_list
(
inputs
[
"inputs
_embeds
"
]
)[
2
]
num_choices
=
shape_list
(
inputs_embeds
)[
1
]
seq_length
=
shape_list
(
inputs_embeds
)[
2
]
flat_input_ids
=
tf
.
reshape
(
inputs
[
"
input_ids
"
]
,
(
-
1
,
seq_length
))
if
inputs
[
"
input_ids
"
]
is
not
None
else
None
flat_input_ids
=
tf
.
reshape
(
input_ids
,
(
-
1
,
seq_length
))
if
input_ids
is
not
None
else
None
flat_attention_mask
=
(
tf
.
reshape
(
tensor
=
inputs
[
"attention_mask"
],
shape
=
(
-
1
,
seq_length
))
if
inputs
[
"attention_mask"
]
is
not
None
else
None
tf
.
reshape
(
tensor
=
attention_mask
,
shape
=
(
-
1
,
seq_length
))
if
attention_mask
is
not
None
else
None
)
flat_token_type_ids
=
(
tf
.
reshape
(
tensor
=
inputs
[
"token_type_ids"
],
shape
=
(
-
1
,
seq_length
))
if
inputs
[
"token_type_ids"
]
is
not
None
else
None
tf
.
reshape
(
tensor
=
token_type_ids
,
shape
=
(
-
1
,
seq_length
))
if
token_type_ids
is
not
None
else
None
)
flat_position_ids
=
(
tf
.
reshape
(
tensor
=
position_ids
,
shape
=
(
-
1
,
seq_length
))
if
position_ids
is
not
None
else
None
)
flat_inputs_embeds
=
(
tf
.
reshape
(
tensor
=
inputs
[
"inputs
_embeds
"
]
,
shape
=
(
-
1
,
seq_length
,
shape_list
(
inputs
[
"inputs
_embeds
"
]
)[
3
]))
if
inputs
[
"
inputs_embeds
"
]
is
not
None
tf
.
reshape
(
tensor
=
inputs_embeds
,
shape
=
(
-
1
,
seq_length
,
shape_list
(
inputs_embeds
)[
3
]))
if
inputs_embeds
is
not
None
else
None
)
outputs
=
self
.
albert
(
...
...
@@ -1518,22 +1390,20 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss):
attention_mask
=
flat_attention_mask
,
token_type_ids
=
flat_token_type_ids
,
position_ids
=
flat_position_ids
,
head_mask
=
inputs
[
"
head_mask
"
]
,
head_mask
=
head_mask
,
inputs_embeds
=
flat_inputs_embeds
,
output_attentions
=
inputs
[
"
output_attentions
"
]
,
output_hidden_states
=
inputs
[
"
output_hidden_states
"
]
,
return_dict
=
inputs
[
"
return_dict
"
]
,
training
=
inputs
[
"
training
"
]
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
training
=
training
,
)
pooled_output
=
outputs
[
1
]
pooled_output
=
self
.
dropout
(
inputs
=
pooled_output
,
training
=
inputs
[
"
training
"
]
)
pooled_output
=
self
.
dropout
(
inputs
=
pooled_output
,
training
=
training
)
logits
=
self
.
classifier
(
inputs
=
pooled_output
)
reshaped_logits
=
tf
.
reshape
(
tensor
=
logits
,
shape
=
(
-
1
,
num_choices
))
loss
=
(
None
if
inputs
[
"labels"
]
is
None
else
self
.
hf_compute_loss
(
labels
=
inputs
[
"labels"
],
logits
=
reshaped_logits
)
)
loss
=
None
if
labels
is
None
else
self
.
hf_compute_loss
(
labels
=
labels
,
logits
=
reshaped_logits
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
reshaped_logits
,)
+
outputs
[
2
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment