Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
79465ac5
Unverified
Commit
79465ac5
authored
Mar 15, 2022
by
Kamal Raj
Committed by
GitHub
Mar 15, 2022
Browse files
TF clearer model variable naming: Tapas (#16145)
parent
a78565b7
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
72 additions
and
158 deletions
+72
-158
src/transformers/models/tapas/modeling_tf_tapas.py
src/transformers/models/tapas/modeling_tf_tapas.py
+72
-158
No files found.
src/transformers/models/tapas/modeling_tf_tapas.py
View file @
79465ac5
...
@@ -43,8 +43,8 @@ from ...modeling_tf_utils import (
...
@@ -43,8 +43,8 @@ from ...modeling_tf_utils import (
TFPreTrainedModel
,
TFPreTrainedModel
,
TFSequenceClassificationLoss
,
TFSequenceClassificationLoss
,
get_initializer
,
get_initializer
,
input_processing
,
keras_serializable
,
keras_serializable
,
unpack_inputs
,
)
)
from
...tf_utils
import
shape_list
from
...tf_utils
import
shape_list
from
...utils
import
logging
from
...utils
import
logging
...
@@ -757,6 +757,7 @@ class TFTapasMainLayer(tf.keras.layers.Layer):
...
@@ -757,6 +757,7 @@ class TFTapasMainLayer(tf.keras.layers.Layer):
"""
"""
raise
NotImplementedError
raise
NotImplementedError
@
unpack_inputs
def
call
(
def
call
(
self
,
self
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
...
@@ -771,43 +772,28 @@ class TFTapasMainLayer(tf.keras.layers.Layer):
...
@@ -771,43 +772,28 @@ class TFTapasMainLayer(tf.keras.layers.Layer):
training
:
bool
=
False
,
training
:
bool
=
False
,
**
kwargs
,
**
kwargs
,
)
->
Union
[
TFBaseModelOutputWithPooling
,
Tuple
[
tf
.
Tensor
]]:
)
->
Union
[
TFBaseModelOutputWithPooling
,
Tuple
[
tf
.
Tensor
]]:
inputs
=
input_processing
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
,
head_mask
=
head_mask
,
inputs_embeds
=
inputs_embeds
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
training
=
training
,
kwargs_call
=
kwargs
,
)
if
inputs
[
"
input_ids
"
]
is
not
None
and
inputs
[
"
inputs_embeds
"
]
is
not
None
:
if
input_ids
is
not
None
and
inputs_embeds
is
not
None
:
raise
ValueError
(
"You cannot specify both input_ids and inputs_embeds at the same time"
)
raise
ValueError
(
"You cannot specify both input_ids and inputs_embeds at the same time"
)
elif
inputs
[
"
input_ids
"
]
is
not
None
:
elif
input_ids
is
not
None
:
input_shape
=
shape_list
(
inputs
[
"
input_ids
"
]
)
input_shape
=
shape_list
(
input_ids
)
elif
inputs
[
"
inputs_embeds
"
]
is
not
None
:
elif
inputs_embeds
is
not
None
:
input_shape
=
shape_list
(
inputs
[
"inputs
_embeds
"
]
)[:
-
1
]
input_shape
=
shape_list
(
inputs_embeds
)[:
-
1
]
else
:
else
:
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
if
inputs
[
"
attention_mask
"
]
is
None
:
if
attention_mask
is
None
:
inputs
[
"
attention_mask
"
]
=
tf
.
fill
(
dims
=
input_shape
,
value
=
1
)
attention_mask
=
tf
.
fill
(
dims
=
input_shape
,
value
=
1
)
if
inputs
[
"
token_type_ids
"
]
is
None
:
if
token_type_ids
is
None
:
inputs
[
"
token_type_ids
"
]
=
tf
.
fill
(
dims
=
input_shape
+
[
len
(
self
.
config
.
type_vocab_sizes
)],
value
=
0
)
token_type_ids
=
tf
.
fill
(
dims
=
input_shape
+
[
len
(
self
.
config
.
type_vocab_sizes
)],
value
=
0
)
embedding_output
=
self
.
embeddings
(
embedding_output
=
self
.
embeddings
(
input_ids
=
inputs
[
"
input_ids
"
]
,
input_ids
=
input_ids
,
position_ids
=
inputs
[
"
position_ids
"
]
,
position_ids
=
position_ids
,
token_type_ids
=
inputs
[
"
token_type_ids
"
]
,
token_type_ids
=
token_type_ids
,
inputs_embeds
=
inputs
[
"inputs
_embeds
"
]
,
inputs_embeds
=
inputs_embeds
,
training
=
inputs
[
"
training
"
]
,
training
=
training
,
)
)
# We create a 3D attention mask from a 2D tensor mask.
# We create a 3D attention mask from a 2D tensor mask.
...
@@ -815,7 +801,7 @@ class TFTapasMainLayer(tf.keras.layers.Layer):
...
@@ -815,7 +801,7 @@ class TFTapasMainLayer(tf.keras.layers.Layer):
# So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
# So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
# this attention mask is more simple than the triangular masking of causal attention
# this attention mask is more simple than the triangular masking of causal attention
# used in OpenAI GPT, we just need to prepare the broadcast dimension here.
# used in OpenAI GPT, we just need to prepare the broadcast dimension here.
extended_attention_mask
=
tf
.
reshape
(
inputs
[
"
attention_mask
"
]
,
(
input_shape
[
0
],
1
,
1
,
input_shape
[
1
]))
extended_attention_mask
=
tf
.
reshape
(
attention_mask
,
(
input_shape
[
0
],
1
,
1
,
input_shape
[
1
]))
# Since attention_mask is 1.0 for positions we want to attend and 0.0 for
# Since attention_mask is 1.0 for positions we want to attend and 0.0 for
# masked positions, this operation will create a tensor which is 0.0 for
# masked positions, this operation will create a tensor which is 0.0 for
...
@@ -832,29 +818,29 @@ class TFTapasMainLayer(tf.keras.layers.Layer):
...
@@ -832,29 +818,29 @@ class TFTapasMainLayer(tf.keras.layers.Layer):
# attention_probs has shape bsz x n_heads x N x N
# attention_probs has shape bsz x n_heads x N x N
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
if
inputs
[
"
head_mask
"
]
is
not
None
:
if
head_mask
is
not
None
:
raise
NotImplementedError
raise
NotImplementedError
else
:
else
:
inputs
[
"
head_mask
"
]
=
[
None
]
*
self
.
config
.
num_hidden_layers
head_mask
=
[
None
]
*
self
.
config
.
num_hidden_layers
encoder_outputs
=
self
.
encoder
(
encoder_outputs
=
self
.
encoder
(
hidden_states
=
embedding_output
,
hidden_states
=
embedding_output
,
attention_mask
=
extended_attention_mask
,
attention_mask
=
extended_attention_mask
,
head_mask
=
inputs
[
"
head_mask
"
]
,
head_mask
=
head_mask
,
encoder_hidden_states
=
None
,
encoder_hidden_states
=
None
,
encoder_attention_mask
=
None
,
encoder_attention_mask
=
None
,
past_key_values
=
None
,
past_key_values
=
None
,
use_cache
=
None
,
use_cache
=
None
,
output_attentions
=
inputs
[
"
output_attentions
"
]
,
output_attentions
=
output_attentions
,
output_hidden_states
=
inputs
[
"
output_hidden_states
"
]
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
inputs
[
"
return_dict
"
]
,
return_dict
=
return_dict
,
training
=
inputs
[
"
training
"
]
,
training
=
training
,
)
)
sequence_output
=
encoder_outputs
[
0
]
sequence_output
=
encoder_outputs
[
0
]
pooled_output
=
self
.
pooler
(
hidden_states
=
sequence_output
)
if
self
.
pooler
is
not
None
else
None
pooled_output
=
self
.
pooler
(
hidden_states
=
sequence_output
)
if
self
.
pooler
is
not
None
else
None
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
return
(
return
(
sequence_output
,
sequence_output
,
pooled_output
,
pooled_output
,
...
@@ -979,6 +965,7 @@ class TFTapasModel(TFTapasPreTrainedModel):
...
@@ -979,6 +965,7 @@ class TFTapasModel(TFTapasPreTrainedModel):
self
.
tapas
=
TFTapasMainLayer
(
config
,
name
=
"tapas"
)
self
.
tapas
=
TFTapasMainLayer
(
config
,
name
=
"tapas"
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
TAPAS_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_start_docstrings_to_model_forward
(
TAPAS_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
replace_return_docstrings
(
output_type
=
TFBaseModelOutputWithPooling
,
config_class
=
_CONFIG_FOR_DOC
)
@
replace_return_docstrings
(
output_type
=
TFBaseModelOutputWithPooling
,
config_class
=
_CONFIG_FOR_DOC
)
def
call
(
def
call
(
...
@@ -1020,9 +1007,7 @@ class TFTapasModel(TFTapasPreTrainedModel):
...
@@ -1020,9 +1007,7 @@ class TFTapasModel(TFTapasPreTrainedModel):
>>> last_hidden_states = outputs.last_hidden_state
>>> last_hidden_states = outputs.last_hidden_state
```"""
```"""
inputs
=
input_processing
(
outputs
=
self
.
tapas
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
token_type_ids
=
token_type_ids
,
...
@@ -1033,19 +1018,6 @@ class TFTapasModel(TFTapasPreTrainedModel):
...
@@ -1033,19 +1018,6 @@ class TFTapasModel(TFTapasPreTrainedModel):
output_hidden_states
=
output_hidden_states
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
return_dict
=
return_dict
,
training
=
training
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
tapas
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
)
return
outputs
return
outputs
...
@@ -1079,6 +1051,7 @@ class TFTapasForMaskedLM(TFTapasPreTrainedModel, TFMaskedLanguageModelingLoss):
...
@@ -1079,6 +1051,7 @@ class TFTapasForMaskedLM(TFTapasPreTrainedModel, TFMaskedLanguageModelingLoss):
def
get_lm_head
(
self
)
->
tf
.
keras
.
layers
.
Layer
:
def
get_lm_head
(
self
)
->
tf
.
keras
.
layers
.
Layer
:
return
self
.
lm_head
.
predictions
return
self
.
lm_head
.
predictions
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
TAPAS_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_start_docstrings_to_model_forward
(
TAPAS_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
replace_return_docstrings
(
output_type
=
TFMaskedLMOutput
,
config_class
=
_CONFIG_FOR_DOC
)
@
replace_return_docstrings
(
output_type
=
TFMaskedLMOutput
,
config_class
=
_CONFIG_FOR_DOC
)
def
call
(
def
call
(
...
@@ -1130,9 +1103,7 @@ class TFTapasForMaskedLM(TFTapasPreTrainedModel, TFMaskedLanguageModelingLoss):
...
@@ -1130,9 +1103,7 @@ class TFTapasForMaskedLM(TFTapasPreTrainedModel, TFMaskedLanguageModelingLoss):
>>> outputs = model(**inputs, labels=labels)
>>> outputs = model(**inputs, labels=labels)
>>> logits = outputs.logits
>>> logits = outputs.logits
```"""
```"""
inputs
=
input_processing
(
outputs
=
self
.
tapas
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
token_type_ids
=
token_type_ids
,
...
@@ -1142,31 +1113,13 @@ class TFTapasForMaskedLM(TFTapasPreTrainedModel, TFMaskedLanguageModelingLoss):
...
@@ -1142,31 +1113,13 @@ class TFTapasForMaskedLM(TFTapasPreTrainedModel, TFMaskedLanguageModelingLoss):
output_attentions
=
output_attentions
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
tapas
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
)
sequence_output
=
outputs
[
0
]
sequence_output
=
outputs
[
0
]
prediction_scores
=
self
.
lm_head
(
sequence_output
)
prediction_scores
=
self
.
lm_head
(
sequence_output
)
loss
=
(
loss
=
None
if
labels
is
None
else
self
.
hf_compute_loss
(
labels
=
labels
,
logits
=
prediction_scores
)
None
if
inputs
[
"labels"
]
is
None
else
self
.
hf_compute_loss
(
labels
=
inputs
[
"labels"
],
logits
=
prediction_scores
)
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
prediction_scores
,)
+
outputs
[
2
:]
output
=
(
prediction_scores
,)
+
outputs
[
2
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
@@ -1311,6 +1264,7 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
...
@@ -1311,6 +1264,7 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
)
)
self
.
config
=
config
self
.
config
=
config
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
TAPAS_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_start_docstrings_to_model_forward
(
TAPAS_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
replace_return_docstrings
(
output_type
=
TFTableQuestionAnsweringOutput
,
config_class
=
_CONFIG_FOR_DOC
)
@
replace_return_docstrings
(
output_type
=
TFTableQuestionAnsweringOutput
,
config_class
=
_CONFIG_FOR_DOC
)
def
call
(
def
call
(
...
@@ -1385,38 +1339,17 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
...
@@ -1385,38 +1339,17 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
>>> logits_aggregation = outputs.logits_aggregation
>>> logits_aggregation = outputs.logits_aggregation
```"""
```"""
inputs
=
input_processing
(
outputs
=
self
.
tapas
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
,
position_ids
=
position_ids
,
head_mask
=
head_mask
,
head_mask
=
head_mask
,
inputs_embeds
=
inputs_embeds
,
inputs_embeds
=
inputs_embeds
,
table_mask
=
table_mask
,
aggregation_labels
=
aggregation_labels
,
float_answer
=
float_answer
,
numeric_values
=
numeric_values
,
numeric_values_scale
=
numeric_values_scale
,
output_attentions
=
output_attentions
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
tapas
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
)
sequence_output
=
outputs
[
0
]
sequence_output
=
outputs
[
0
]
...
@@ -1424,14 +1357,14 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
...
@@ -1424,14 +1357,14 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
sequence_output
=
self
.
dropout
(
sequence_output
)
sequence_output
=
self
.
dropout
(
sequence_output
)
if
inputs
[
"
input_ids
"
]
is
not
None
:
if
input_ids
is
not
None
:
input_shape
=
shape_list
(
inputs
[
"
input_ids
"
]
)
input_shape
=
shape_list
(
input_ids
)
else
:
else
:
input_shape
=
shape_list
(
inputs
[
"inputs
_embeds
"
]
)[:
-
1
]
input_shape
=
shape_list
(
inputs_embeds
)[:
-
1
]
# Construct indices for the table.
# Construct indices for the table.
if
inputs
[
"
token_type_ids
"
]
is
None
:
if
token_type_ids
is
None
:
inputs
[
"
token_type_ids
"
]
=
tf
.
fill
(
input_shape
+
[
len
(
self
.
config
.
type_vocab_sizes
)],
0
)
token_type_ids
=
tf
.
fill
(
input_shape
+
[
len
(
self
.
config
.
type_vocab_sizes
)],
0
)
token_types
=
[
token_types
=
[
"segment_ids"
,
"segment_ids"
,
...
@@ -1443,8 +1376,8 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
...
@@ -1443,8 +1376,8 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
"numeric_relations"
,
"numeric_relations"
,
]
]
row_ids
=
inputs
[
"
token_type_ids
"
]
[:,
:,
token_types
.
index
(
"row_ids"
)]
row_ids
=
token_type_ids
[:,
:,
token_types
.
index
(
"row_ids"
)]
column_ids
=
inputs
[
"
token_type_ids
"
]
[:,
:,
token_types
.
index
(
"column_ids"
)]
column_ids
=
token_type_ids
[:,
:,
token_types
.
index
(
"column_ids"
)]
# Construct indices for the table.
# Construct indices for the table.
row_index
=
IndexMap
(
row_index
=
IndexMap
(
...
@@ -1460,19 +1393,15 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
...
@@ -1460,19 +1393,15 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
cell_index
=
ProductIndexMap
(
row_index
,
col_index
)
cell_index
=
ProductIndexMap
(
row_index
,
col_index
)
# Masks.
# Masks.
input_shape
=
(
input_shape
=
shape_list
(
input_ids
)
if
input_ids
is
not
None
else
shape_list
(
inputs_embeds
)[:
-
1
]
shape_list
(
inputs
[
"input_ids"
])
if
attention_mask
is
None
:
if
inputs
[
"input_ids"
]
is
not
None
attention_mask
=
tf
.
ones
(
input_shape
)
else
shape_list
(
inputs
[
"inputs_embeds"
])[:
-
1
]
)
if
inputs
[
"attention_mask"
]
is
None
:
inputs
[
"attention_mask"
]
=
tf
.
ones
(
input_shape
)
# Table cells only, without question tokens and table headers.
# Table cells only, without question tokens and table headers.
if
inputs
[
"
table_mask
"
]
is
None
:
if
table_mask
is
None
:
inputs
[
"
table_mask
"
]
=
tf
.
where
(
row_ids
>
0
,
tf
.
ones_like
(
row_ids
),
tf
.
zeros_like
(
row_ids
))
table_mask
=
tf
.
where
(
row_ids
>
0
,
tf
.
ones_like
(
row_ids
),
tf
.
zeros_like
(
row_ids
))
# <float32>[batch_size, seq_length]
# <float32>[batch_size, seq_length]
input_mask_float
=
tf
.
cast
(
inputs
[
"
attention_mask
"
]
,
tf
.
float32
)
input_mask_float
=
tf
.
cast
(
attention_mask
,
tf
.
float32
)
table_mask_float
=
tf
.
cast
(
inputs
[
"
table_mask
"
]
,
tf
.
float32
)
table_mask_float
=
tf
.
cast
(
table_mask
,
tf
.
float32
)
# Mask for cells that exist in the table (i.e. that are not padding).
# Mask for cells that exist in the table (i.e. that are not padding).
cell_mask
,
_
=
reduce_mean
(
input_mask_float
,
cell_index
)
cell_mask
,
_
=
reduce_mean
(
input_mask_float
,
cell_index
)
...
@@ -1495,7 +1424,7 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
...
@@ -1495,7 +1424,7 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
# Total loss calculation
# Total loss calculation
total_loss
=
0.0
total_loss
=
0.0
calculate_loss
=
False
calculate_loss
=
False
if
inputs
[
"
labels
"
]
is
not
None
:
if
labels
is
not
None
:
calculate_loss
=
True
calculate_loss
=
True
is_supervised
=
not
self
.
config
.
num_aggregation_labels
>
0
or
not
self
.
config
.
use_answer_as_supervision
is_supervised
=
not
self
.
config
.
num_aggregation_labels
>
0
or
not
self
.
config
.
use_answer_as_supervision
...
@@ -1509,16 +1438,16 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
...
@@ -1509,16 +1438,16 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
if
is_supervised
:
if
is_supervised
:
aggregate_mask
=
None
aggregate_mask
=
None
else
:
else
:
if
inputs
[
"
float_answer
"
]
is
not
None
:
if
float_answer
is
not
None
:
assert
(
assert
(
shape_list
(
inputs
[
"
labels
"
]
)[
0
]
==
shape_list
(
inputs
[
"
float_answer
"
]
)[
0
]
shape_list
(
labels
)[
0
]
==
shape_list
(
float_answer
)[
0
]
),
"Make sure the answers are a FloatTensor of shape (batch_size,)"
),
"Make sure the answers are a FloatTensor of shape (batch_size,)"
# <float32>[batch_size]
# <float32>[batch_size]
aggregate_mask
=
_calculate_aggregate_mask
(
aggregate_mask
=
_calculate_aggregate_mask
(
inputs
[
"
float_answer
"
]
,
float_answer
,
pooled_output
,
pooled_output
,
self
.
config
.
cell_selection_preference
,
self
.
config
.
cell_selection_preference
,
inputs
[
"
labels
"
]
,
labels
,
self
.
aggregation_classifier
,
self
.
aggregation_classifier
,
)
)
else
:
else
:
...
@@ -1535,17 +1464,17 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
...
@@ -1535,17 +1464,17 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
selection_loss_per_example
=
None
selection_loss_per_example
=
None
if
not
self
.
config
.
select_one_column
:
if
not
self
.
config
.
select_one_column
:
weight
=
tf
.
where
(
weight
=
tf
.
where
(
inputs
[
"
labels
"
]
==
0
,
labels
==
0
,
tf
.
ones_like
(
inputs
[
"
labels
"
]
,
dtype
=
tf
.
float32
),
tf
.
ones_like
(
labels
,
dtype
=
tf
.
float32
),
self
.
config
.
positive_label_weight
*
tf
.
ones_like
(
inputs
[
"
labels
"
]
,
dtype
=
tf
.
float32
),
self
.
config
.
positive_label_weight
*
tf
.
ones_like
(
labels
,
dtype
=
tf
.
float32
),
)
)
selection_loss_per_token
=
-
dist_per_token
.
log_prob
(
inputs
[
"
labels
"
]
)
*
weight
selection_loss_per_token
=
-
dist_per_token
.
log_prob
(
labels
)
*
weight
selection_loss_per_example
=
tf
.
reduce_sum
(
selection_loss_per_token
*
input_mask_float
,
axis
=
1
)
/
(
selection_loss_per_example
=
tf
.
reduce_sum
(
selection_loss_per_token
*
input_mask_float
,
axis
=
1
)
/
(
tf
.
reduce_sum
(
input_mask_float
,
axis
=
1
)
+
EPSILON_ZERO_DIVISION
tf
.
reduce_sum
(
input_mask_float
,
axis
=
1
)
+
EPSILON_ZERO_DIVISION
)
)
else
:
else
:
selection_loss_per_example
,
logits
=
_single_column_cell_selection_loss
(
selection_loss_per_example
,
logits
=
_single_column_cell_selection_loss
(
logits
,
column_logits
,
inputs
[
"
labels
"
]
,
cell_index
,
col_index
,
cell_mask
logits
,
column_logits
,
labels
,
cell_index
,
col_index
,
cell_mask
)
)
dist_per_token
=
tfp
.
distributions
.
Bernoulli
(
logits
=
logits
)
dist_per_token
=
tfp
.
distributions
.
Bernoulli
(
logits
=
logits
)
...
@@ -1562,14 +1491,14 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
...
@@ -1562,14 +1491,14 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
if
self
.
config
.
num_aggregation_labels
>
0
:
if
self
.
config
.
num_aggregation_labels
>
0
:
if
is_supervised
:
if
is_supervised
:
# Note that `aggregate_mask` is None if the setting is supervised.
# Note that `aggregate_mask` is None if the setting is supervised.
if
inputs
[
"
aggregation_labels
"
]
is
not
None
:
if
aggregation_labels
is
not
None
:
assert
(
assert
(
shape_list
(
inputs
[
"
labels
"
]
)[
0
]
==
shape_list
(
inputs
[
"
aggregation_labels
"
]
)[
0
]
shape_list
(
labels
)[
0
]
==
shape_list
(
aggregation_labels
)[
0
]
),
"Make sure the aggregation labels are a LongTensor of shape (batch_size,)"
),
"Make sure the aggregation labels are a LongTensor of shape (batch_size,)"
per_example_additional_loss
=
_calculate_aggregation_loss
(
per_example_additional_loss
=
_calculate_aggregation_loss
(
logits_aggregation
,
logits_aggregation
,
aggregate_mask
,
aggregate_mask
,
inputs
[
"
aggregation_labels
"
]
,
aggregation_labels
,
self
.
config
.
use_answer_as_supervision
,
self
.
config
.
use_answer_as_supervision
,
self
.
config
.
num_aggregation_labels
,
self
.
config
.
num_aggregation_labels
,
self
.
config
.
aggregation_loss_weight
,
self
.
config
.
aggregation_loss_weight
,
...
@@ -1579,7 +1508,7 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
...
@@ -1579,7 +1508,7 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
"You have to specify aggregation labels in order to calculate the aggregation loss"
"You have to specify aggregation labels in order to calculate the aggregation loss"
)
)
else
:
else
:
aggregation_labels
=
tf
.
zeros
(
shape_list
(
inputs
[
"
labels
"
]
)[
0
],
dtype
=
tf
.
int32
)
aggregation_labels
=
tf
.
zeros
(
shape_list
(
labels
)[
0
],
dtype
=
tf
.
int32
)
per_example_additional_loss
=
_calculate_aggregation_loss
(
per_example_additional_loss
=
_calculate_aggregation_loss
(
logits_aggregation
,
logits_aggregation
,
aggregate_mask
,
aggregate_mask
,
...
@@ -1590,15 +1519,15 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
...
@@ -1590,15 +1519,15 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
)
)
if
self
.
config
.
use_answer_as_supervision
:
if
self
.
config
.
use_answer_as_supervision
:
if
inputs
[
"
numeric_values
"
]
is
not
None
and
inputs
[
"
numeric_values_scale
"
]
is
not
None
:
if
numeric_values
is
not
None
and
numeric_values_scale
is
not
None
:
assert
shape_list
(
inputs
[
"
numeric_values
"
]
)
==
shape_list
(
inputs
[
"
numeric_values_scale
"
]
)
assert
shape_list
(
numeric_values
)
==
shape_list
(
numeric_values_scale
)
# Add regression loss for numeric answers which require aggregation.
# Add regression loss for numeric answers which require aggregation.
answer_loss
,
large_answer_loss_mask
=
_calculate_regression_loss
(
answer_loss
,
large_answer_loss_mask
=
_calculate_regression_loss
(
inputs
[
"
float_answer
"
]
,
float_answer
,
aggregate_mask
,
aggregate_mask
,
dist_per_token
,
dist_per_token
,
inputs
[
"
numeric_values
"
]
,
numeric_values
,
inputs
[
"
numeric_values_scale
"
]
,
numeric_values_scale
,
table_mask_float
,
table_mask_float
,
logits_aggregation
,
logits_aggregation
,
self
.
config
,
self
.
config
,
...
@@ -1618,7 +1547,7 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
...
@@ -1618,7 +1547,7 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
_
,
logits
=
_single_column_cell_selection_loss
(
_
,
logits
=
_single_column_cell_selection_loss
(
logits
,
column_logits
,
labels
,
cell_index
,
col_index
,
cell_mask
logits
,
column_logits
,
labels
,
cell_index
,
col_index
,
cell_mask
)
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
logits
,
logits_aggregation
)
+
outputs
[
2
:]
output
=
(
logits
,
logits_aggregation
)
+
outputs
[
2
:]
return
((
total_loss
,)
+
output
)
if
calculate_loss
else
output
return
((
total_loss
,)
+
output
)
if
calculate_loss
else
output
...
@@ -1657,6 +1586,7 @@ class TFTapasForSequenceClassification(TFTapasPreTrainedModel, TFSequenceClassif
...
@@ -1657,6 +1586,7 @@ class TFTapasForSequenceClassification(TFTapasPreTrainedModel, TFSequenceClassif
config
.
num_labels
,
kernel_initializer
=
get_initializer
(
config
.
initializer_range
),
name
=
"classifier"
config
.
num_labels
,
kernel_initializer
=
get_initializer
(
config
.
initializer_range
),
name
=
"classifier"
)
)
@
unpack_inputs
@
add_start_docstrings_to_model_forward
(
TAPAS_INPUTS_DOCSTRING
.
format
(
"batch_size, num_choices, sequence_length"
))
@
add_start_docstrings_to_model_forward
(
TAPAS_INPUTS_DOCSTRING
.
format
(
"batch_size, num_choices, sequence_length"
))
@
replace_return_docstrings
(
output_type
=
TFSequenceClassifierOutput
,
config_class
=
_CONFIG_FOR_DOC
)
@
replace_return_docstrings
(
output_type
=
TFSequenceClassifierOutput
,
config_class
=
_CONFIG_FOR_DOC
)
def
call
(
def
call
(
...
@@ -1712,9 +1642,7 @@ class TFTapasForSequenceClassification(TFTapasPreTrainedModel, TFSequenceClassif
...
@@ -1712,9 +1642,7 @@ class TFTapasForSequenceClassification(TFTapasPreTrainedModel, TFSequenceClassif
>>> logits = outputs.logits
>>> logits = outputs.logits
```"""
```"""
inputs
=
input_processing
(
outputs
=
self
.
tapas
(
func
=
self
.
call
,
config
=
self
.
config
,
input_ids
=
input_ids
,
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
token_type_ids
=
token_type_ids
,
...
@@ -1724,28 +1652,14 @@ class TFTapasForSequenceClassification(TFTapasPreTrainedModel, TFSequenceClassif
...
@@ -1724,28 +1652,14 @@ class TFTapasForSequenceClassification(TFTapasPreTrainedModel, TFSequenceClassif
output_attentions
=
output_attentions
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
return_dict
=
return_dict
,
labels
=
labels
,
training
=
training
,
training
=
training
,
kwargs_call
=
kwargs
,
)
outputs
=
self
.
tapas
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
token_type_ids
=
inputs
[
"token_type_ids"
],
position_ids
=
inputs
[
"position_ids"
],
head_mask
=
inputs
[
"head_mask"
],
inputs_embeds
=
inputs
[
"inputs_embeds"
],
output_attentions
=
inputs
[
"output_attentions"
],
output_hidden_states
=
inputs
[
"output_hidden_states"
],
return_dict
=
inputs
[
"return_dict"
],
training
=
inputs
[
"training"
],
)
)
pooled_output
=
outputs
[
1
]
pooled_output
=
outputs
[
1
]
pooled_output
=
self
.
dropout
(
inputs
=
pooled_output
,
training
=
inputs
[
"
training
"
]
)
pooled_output
=
self
.
dropout
(
inputs
=
pooled_output
,
training
=
training
)
logits
=
self
.
classifier
(
inputs
=
pooled_output
)
logits
=
self
.
classifier
(
inputs
=
pooled_output
)
loss
=
None
if
inputs
[
"
labels
"
]
is
None
else
self
.
hf_compute_loss
(
labels
=
inputs
[
"
labels
"
]
,
logits
=
logits
)
loss
=
None
if
labels
is
None
else
self
.
hf_compute_loss
(
labels
=
labels
,
logits
=
logits
)
if
not
inputs
[
"
return_dict
"
]
:
if
not
return_dict
:
output
=
(
logits
,)
+
outputs
[
2
:]
output
=
(
logits
,)
+
outputs
[
2
:]
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment