Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
7b262b96
Unverified
Commit
7b262b96
authored
Mar 22, 2022
by
Adam Montgomerie
Committed by
GitHub
Mar 22, 2022
Browse files
Funnel type hints (#16323)
* add pt funnel type hints * add tf funnel type hints
parent
deb61e5f
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
254 additions
and
219 deletions
+254
-219
src/transformers/models/funnel/modeling_funnel.py
src/transformers/models/funnel/modeling_funnel.py
+162
-129
src/transformers/models/funnel/modeling_tf_funnel.py
src/transformers/models/funnel/modeling_tf_funnel.py
+92
-90
No files found.
src/transformers/models/funnel/modeling_funnel.py
View file @
7b262b96
...
@@ -16,7 +16,7 @@
...
@@ -16,7 +16,7 @@
import
os
import
os
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
typing
import
Optional
,
Tuple
from
typing
import
List
,
Optional
,
Tuple
,
Union
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
...
@@ -157,13 +157,15 @@ def load_tf_weights_in_funnel(model, config, tf_checkpoint_path):
...
@@ -157,13 +157,15 @@ def load_tf_weights_in_funnel(model, config, tf_checkpoint_path):
class
FunnelEmbeddings
(
nn
.
Module
):
class
FunnelEmbeddings
(
nn
.
Module
):
def
__init__
(
self
,
config
)
:
def
__init__
(
self
,
config
:
FunnelConfig
)
->
None
:
super
().
__init__
()
super
().
__init__
()
self
.
word_embeddings
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
hidden_size
,
padding_idx
=
config
.
pad_token_id
)
self
.
word_embeddings
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
hidden_size
,
padding_idx
=
config
.
pad_token_id
)
self
.
layer_norm
=
nn
.
LayerNorm
(
config
.
d_model
,
eps
=
config
.
layer_norm_eps
)
self
.
layer_norm
=
nn
.
LayerNorm
(
config
.
d_model
,
eps
=
config
.
layer_norm_eps
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout
)
def
forward
(
self
,
input_ids
=
None
,
inputs_embeds
=
None
):
def
forward
(
self
,
input_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
inputs_embeds
:
Optional
[
torch
.
Tensor
]
=
None
)
->
torch
.
Tensor
:
if
inputs_embeds
is
None
:
if
inputs_embeds
is
None
:
inputs_embeds
=
self
.
word_embeddings
(
input_ids
)
inputs_embeds
=
self
.
word_embeddings
(
input_ids
)
embeddings
=
self
.
layer_norm
(
inputs_embeds
)
embeddings
=
self
.
layer_norm
(
inputs_embeds
)
...
@@ -178,7 +180,7 @@ class FunnelAttentionStructure(nn.Module):
...
@@ -178,7 +180,7 @@ class FunnelAttentionStructure(nn.Module):
cls_token_type_id
:
int
=
2
cls_token_type_id
:
int
=
2
def
__init__
(
self
,
config
)
:
def
__init__
(
self
,
config
:
FunnelConfig
)
->
None
:
super
().
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
config
=
config
self
.
sin_dropout
=
nn
.
Dropout
(
config
.
hidden_dropout
)
self
.
sin_dropout
=
nn
.
Dropout
(
config
.
hidden_dropout
)
...
@@ -187,7 +189,12 @@ class FunnelAttentionStructure(nn.Module):
...
@@ -187,7 +189,12 @@ class FunnelAttentionStructure(nn.Module):
# divided.
# divided.
self
.
pooling_mult
=
None
self
.
pooling_mult
=
None
def
init_attention_inputs
(
self
,
inputs_embeds
,
attention_mask
=
None
,
token_type_ids
=
None
):
def
init_attention_inputs
(
self
,
inputs_embeds
:
torch
.
Tensor
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
token_type_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
)
->
Tuple
[
torch
.
Tensor
]:
"""Returns the attention inputs associated to the inputs of the model."""
"""Returns the attention inputs associated to the inputs of the model."""
# inputs_embeds has shape batch_size x seq_len x d_model
# inputs_embeds has shape batch_size x seq_len x d_model
# attention_mask and token_type_ids have shape batch_size x seq_len
# attention_mask and token_type_ids have shape batch_size x seq_len
...
@@ -202,7 +209,7 @@ class FunnelAttentionStructure(nn.Module):
...
@@ -202,7 +209,7 @@ class FunnelAttentionStructure(nn.Module):
)
)
return
(
position_embeds
,
token_type_mat
,
attention_mask
,
cls_mask
)
return
(
position_embeds
,
token_type_mat
,
attention_mask
,
cls_mask
)
def
token_type_ids_to_mat
(
self
,
token_type_ids
)
:
def
token_type_ids_to_mat
(
self
,
token_type_ids
:
torch
.
Tensor
)
->
torch
.
Tensor
:
"""Convert `token_type_ids` to `token_type_mat`."""
"""Convert `token_type_ids` to `token_type_mat`."""
token_type_mat
=
token_type_ids
[:,
:,
None
]
==
token_type_ids
[:,
None
]
token_type_mat
=
token_type_ids
[:,
:,
None
]
==
token_type_ids
[:,
None
]
# Treat <cls> as in the same segment as both A & B
# Treat <cls> as in the same segment as both A & B
...
@@ -210,7 +217,9 @@ class FunnelAttentionStructure(nn.Module):
...
@@ -210,7 +217,9 @@ class FunnelAttentionStructure(nn.Module):
cls_mat
=
cls_ids
[:,
:,
None
]
|
cls_ids
[:,
None
]
cls_mat
=
cls_ids
[:,
:,
None
]
|
cls_ids
[:,
None
]
return
cls_mat
|
token_type_mat
return
cls_mat
|
token_type_mat
def
get_position_embeds
(
self
,
seq_len
,
dtype
,
device
):
def
get_position_embeds
(
self
,
seq_len
:
int
,
dtype
:
torch
.
dtype
,
device
:
torch
.
device
)
->
Union
[
Tuple
[
torch
.
Tensor
],
List
[
List
[
torch
.
Tensor
]]]:
"""
"""
Create and cache inputs related to relative position encoding. Those are very different depending on whether we
Create and cache inputs related to relative position encoding. Those are very different depending on whether we
are using the factorized or the relative shift attention:
are using the factorized or the relative shift attention:
...
@@ -288,7 +297,7 @@ class FunnelAttentionStructure(nn.Module):
...
@@ -288,7 +297,7 @@ class FunnelAttentionStructure(nn.Module):
position_embeds_list
.
append
([
position_embeds_no_pooling
,
position_embeds_pooling
])
position_embeds_list
.
append
([
position_embeds_no_pooling
,
position_embeds_pooling
])
return
position_embeds_list
return
position_embeds_list
def
stride_pool_pos
(
self
,
pos_id
,
block_index
):
def
stride_pool_pos
(
self
,
pos_id
:
torch
.
Tensor
,
block_index
:
int
):
"""
"""
Pool `pos_id` while keeping the cls token separate (if `config.separate_cls=True`).
Pool `pos_id` while keeping the cls token separate (if `config.separate_cls=True`).
"""
"""
...
@@ -303,7 +312,7 @@ class FunnelAttentionStructure(nn.Module):
...
@@ -303,7 +312,7 @@ class FunnelAttentionStructure(nn.Module):
else
:
else
:
return
pos_id
[::
2
]
return
pos_id
[::
2
]
def
relative_pos
(
self
,
pos
,
stride
,
pooled_pos
=
None
,
shift
=
1
)
:
def
relative_pos
(
self
,
pos
:
torch
.
Tensor
,
stride
:
int
,
pooled_pos
=
None
,
shift
:
int
=
1
)
->
torch
.
Tensor
:
"""
"""
Build the relative positional vector between `pos` and `pooled_pos`.
Build the relative positional vector between `pos` and `pooled_pos`.
"""
"""
...
@@ -317,7 +326,11 @@ class FunnelAttentionStructure(nn.Module):
...
@@ -317,7 +326,11 @@ class FunnelAttentionStructure(nn.Module):
return
torch
.
arange
(
max_dist
,
min_dist
-
1
,
-
stride
,
dtype
=
torch
.
long
,
device
=
pos
.
device
)
return
torch
.
arange
(
max_dist
,
min_dist
-
1
,
-
stride
,
dtype
=
torch
.
long
,
device
=
pos
.
device
)
def
stride_pool
(
self
,
tensor
,
axis
):
def
stride_pool
(
self
,
tensor
:
Union
[
torch
.
Tensor
,
Tuple
[
torch
.
Tensor
],
List
[
torch
.
Tensor
]],
axis
:
Union
[
int
,
Tuple
[
int
],
List
[
int
]],
)
->
torch
.
Tensor
:
"""
"""
Perform pooling by stride slicing the tensor along the given axis.
Perform pooling by stride slicing the tensor along the given axis.
"""
"""
...
@@ -346,7 +359,9 @@ class FunnelAttentionStructure(nn.Module):
...
@@ -346,7 +359,9 @@ class FunnelAttentionStructure(nn.Module):
tensor
=
torch
.
cat
([
tensor
[
cls_slice
],
tensor
],
axis
=
axis
)
tensor
=
torch
.
cat
([
tensor
[
cls_slice
],
tensor
],
axis
=
axis
)
return
tensor
[
enc_slice
]
return
tensor
[
enc_slice
]
def
pool_tensor
(
self
,
tensor
,
mode
=
"mean"
,
stride
=
2
):
def
pool_tensor
(
self
,
tensor
:
Union
[
torch
.
Tensor
,
Tuple
[
torch
.
Tensor
],
List
[
torch
.
Tensor
]],
mode
:
str
=
"mean"
,
stride
:
int
=
2
)
->
torch
.
Tensor
:
"""Apply 1D pooling to a tensor of size [B x T (x H)]."""
"""Apply 1D pooling to a tensor of size [B x T (x H)]."""
if
tensor
is
None
:
if
tensor
is
None
:
return
None
return
None
...
@@ -382,7 +397,9 @@ class FunnelAttentionStructure(nn.Module):
...
@@ -382,7 +397,9 @@ class FunnelAttentionStructure(nn.Module):
return
tensor
[:,
0
]
return
tensor
[:,
0
]
return
tensor
return
tensor
def
pre_attention_pooling
(
self
,
output
,
attention_inputs
):
def
pre_attention_pooling
(
self
,
output
,
attention_inputs
:
Tuple
[
torch
.
Tensor
]
)
->
Tuple
[
torch
.
Tensor
,
Tuple
[
torch
.
Tensor
]]:
"""Pool `output` and the proper parts of `attention_inputs` before the attention layer."""
"""Pool `output` and the proper parts of `attention_inputs` before the attention layer."""
position_embeds
,
token_type_mat
,
attention_mask
,
cls_mask
=
attention_inputs
position_embeds
,
token_type_mat
,
attention_mask
,
cls_mask
=
attention_inputs
if
self
.
config
.
pool_q_only
:
if
self
.
config
.
pool_q_only
:
...
@@ -402,7 +419,7 @@ class FunnelAttentionStructure(nn.Module):
...
@@ -402,7 +419,7 @@ class FunnelAttentionStructure(nn.Module):
attention_inputs
=
(
position_embeds
,
token_type_mat
,
attention_mask
,
cls_mask
)
attention_inputs
=
(
position_embeds
,
token_type_mat
,
attention_mask
,
cls_mask
)
return
output
,
attention_inputs
return
output
,
attention_inputs
def
post_attention_pooling
(
self
,
attention_inputs
)
:
def
post_attention_pooling
(
self
,
attention_inputs
:
Tuple
[
torch
.
Tensor
])
->
Tuple
[
torch
.
Tensor
]
:
"""Pool the proper parts of `attention_inputs` after the attention layer."""
"""Pool the proper parts of `attention_inputs` after the attention layer."""
position_embeds
,
token_type_mat
,
attention_mask
,
cls_mask
=
attention_inputs
position_embeds
,
token_type_mat
,
attention_mask
,
cls_mask
=
attention_inputs
if
self
.
config
.
pool_q_only
:
if
self
.
config
.
pool_q_only
:
...
@@ -416,7 +433,7 @@ class FunnelAttentionStructure(nn.Module):
...
@@ -416,7 +433,7 @@ class FunnelAttentionStructure(nn.Module):
return
attention_inputs
return
attention_inputs
def
_relative_shift_gather
(
positional_attn
,
context_len
,
shift
)
:
def
_relative_shift_gather
(
positional_attn
:
torch
.
Tensor
,
context_len
:
int
,
shift
:
int
)
->
torch
.
Tensor
:
batch_size
,
n_head
,
seq_len
,
max_rel_len
=
positional_attn
.
shape
batch_size
,
n_head
,
seq_len
,
max_rel_len
=
positional_attn
.
shape
# max_rel_len = 2 * context_len + shift -1 is the numbers of possible relative positions i-j
# max_rel_len = 2 * context_len + shift -1 is the numbers of possible relative positions i-j
...
@@ -433,7 +450,7 @@ def _relative_shift_gather(positional_attn, context_len, shift):
...
@@ -433,7 +450,7 @@ def _relative_shift_gather(positional_attn, context_len, shift):
class
FunnelRelMultiheadAttention
(
nn
.
Module
):
class
FunnelRelMultiheadAttention
(
nn
.
Module
):
def
__init__
(
self
,
config
,
block_index
)
:
def
__init__
(
self
,
config
:
FunnelConfig
,
block_index
:
int
)
->
None
:
super
().
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
config
=
config
self
.
block_index
=
block_index
self
.
block_index
=
block_index
...
@@ -522,7 +539,14 @@ class FunnelRelMultiheadAttention(nn.Module):
...
@@ -522,7 +539,14 @@ class FunnelRelMultiheadAttention(nn.Module):
token_type_attn
*=
cls_mask
token_type_attn
*=
cls_mask
return
token_type_attn
return
token_type_attn
def
forward
(
self
,
query
,
key
,
value
,
attention_inputs
,
output_attentions
=
False
):
def
forward
(
self
,
query
:
torch
.
Tensor
,
key
:
torch
.
Tensor
,
value
:
torch
.
Tensor
,
attention_inputs
:
Tuple
[
torch
.
Tensor
],
output_attentions
:
bool
=
False
,
)
->
Tuple
[
torch
.
Tensor
,
...]:
# query has shape batch_size x seq_len x d_model
# query has shape batch_size x seq_len x d_model
# key and value have shapes batch_size x context_len x d_model
# key and value have shapes batch_size x context_len x d_model
position_embeds
,
token_type_mat
,
attention_mask
,
cls_mask
=
attention_inputs
position_embeds
,
token_type_mat
,
attention_mask
,
cls_mask
=
attention_inputs
...
@@ -570,7 +594,7 @@ class FunnelRelMultiheadAttention(nn.Module):
...
@@ -570,7 +594,7 @@ class FunnelRelMultiheadAttention(nn.Module):
class
FunnelPositionwiseFFN
(
nn
.
Module
):
class
FunnelPositionwiseFFN
(
nn
.
Module
):
def
__init__
(
self
,
config
)
:
def
__init__
(
self
,
config
:
FunnelConfig
)
->
None
:
super
().
__init__
()
super
().
__init__
()
self
.
linear_1
=
nn
.
Linear
(
config
.
d_model
,
config
.
d_inner
)
self
.
linear_1
=
nn
.
Linear
(
config
.
d_model
,
config
.
d_inner
)
self
.
activation_function
=
ACT2FN
[
config
.
hidden_act
]
self
.
activation_function
=
ACT2FN
[
config
.
hidden_act
]
...
@@ -579,7 +603,7 @@ class FunnelPositionwiseFFN(nn.Module):
...
@@ -579,7 +603,7 @@ class FunnelPositionwiseFFN(nn.Module):
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout
)
self
.
layer_norm
=
nn
.
LayerNorm
(
config
.
d_model
,
config
.
layer_norm_eps
)
self
.
layer_norm
=
nn
.
LayerNorm
(
config
.
d_model
,
config
.
layer_norm_eps
)
def
forward
(
self
,
hidden
)
:
def
forward
(
self
,
hidden
:
torch
.
Tensor
)
->
torch
.
Tensor
:
h
=
self
.
linear_1
(
hidden
)
h
=
self
.
linear_1
(
hidden
)
h
=
self
.
activation_function
(
h
)
h
=
self
.
activation_function
(
h
)
h
=
self
.
activation_dropout
(
h
)
h
=
self
.
activation_dropout
(
h
)
...
@@ -589,19 +613,26 @@ class FunnelPositionwiseFFN(nn.Module):
...
@@ -589,19 +613,26 @@ class FunnelPositionwiseFFN(nn.Module):
class
FunnelLayer
(
nn
.
Module
):
class
FunnelLayer
(
nn
.
Module
):
def
__init__
(
self
,
config
,
block_index
)
:
def
__init__
(
self
,
config
:
FunnelConfig
,
block_index
:
int
)
->
None
:
super
().
__init__
()
super
().
__init__
()
self
.
attention
=
FunnelRelMultiheadAttention
(
config
,
block_index
)
self
.
attention
=
FunnelRelMultiheadAttention
(
config
,
block_index
)
self
.
ffn
=
FunnelPositionwiseFFN
(
config
)
self
.
ffn
=
FunnelPositionwiseFFN
(
config
)
def
forward
(
self
,
query
,
key
,
value
,
attention_inputs
,
output_attentions
=
False
):
def
forward
(
self
,
query
:
torch
.
Tensor
,
key
:
torch
.
Tensor
,
value
:
torch
.
Tensor
,
attention_inputs
,
output_attentions
:
bool
=
False
,
)
->
Tuple
:
attn
=
self
.
attention
(
query
,
key
,
value
,
attention_inputs
,
output_attentions
=
output_attentions
)
attn
=
self
.
attention
(
query
,
key
,
value
,
attention_inputs
,
output_attentions
=
output_attentions
)
output
=
self
.
ffn
(
attn
[
0
])
output
=
self
.
ffn
(
attn
[
0
])
return
(
output
,
attn
[
1
])
if
output_attentions
else
(
output
,)
return
(
output
,
attn
[
1
])
if
output_attentions
else
(
output
,)
class
FunnelEncoder
(
nn
.
Module
):
class
FunnelEncoder
(
nn
.
Module
):
def
__init__
(
self
,
config
)
:
def
__init__
(
self
,
config
:
FunnelConfig
)
->
None
:
super
().
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
config
=
config
self
.
attention_structure
=
FunnelAttentionStructure
(
config
)
self
.
attention_structure
=
FunnelAttentionStructure
(
config
)
...
@@ -614,13 +645,13 @@ class FunnelEncoder(nn.Module):
...
@@ -614,13 +645,13 @@ class FunnelEncoder(nn.Module):
def
forward
(
def
forward
(
self
,
self
,
inputs_embeds
,
inputs_embeds
:
torch
.
Tensor
,
attention_mask
=
None
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
output_attentions
=
False
,
output_attentions
:
bool
=
False
,
output_hidden_states
=
False
,
output_hidden_states
:
bool
=
False
,
return_dict
=
True
,
return_dict
:
bool
=
True
,
):
)
->
Union
[
Tuple
,
BaseModelOutput
]
:
# The pooling is not implemented on long tensors, so we convert this mask.
# The pooling is not implemented on long tensors, so we convert this mask.
attention_mask
=
attention_mask
.
type_as
(
inputs_embeds
)
attention_mask
=
attention_mask
.
type_as
(
inputs_embeds
)
attention_inputs
=
self
.
attention_structure
.
init_attention_inputs
(
attention_inputs
=
self
.
attention_structure
.
init_attention_inputs
(
...
@@ -663,7 +694,9 @@ class FunnelEncoder(nn.Module):
...
@@ -663,7 +694,9 @@ class FunnelEncoder(nn.Module):
return
BaseModelOutput
(
last_hidden_state
=
hidden
,
hidden_states
=
all_hidden_states
,
attentions
=
all_attentions
)
return
BaseModelOutput
(
last_hidden_state
=
hidden
,
hidden_states
=
all_hidden_states
,
attentions
=
all_attentions
)
def
upsample
(
x
,
stride
,
target_len
,
separate_cls
=
True
,
truncate_seq
=
False
):
def
upsample
(
x
:
torch
.
Tensor
,
stride
:
int
,
target_len
:
int
,
separate_cls
:
bool
=
True
,
truncate_seq
:
bool
=
False
)
->
torch
.
Tensor
:
"""
"""
Upsample tensor `x` to match `target_len` by repeating the tokens `stride` time on the sequence length dimension.
Upsample tensor `x` to match `target_len` by repeating the tokens `stride` time on the sequence length dimension.
"""
"""
...
@@ -684,7 +717,7 @@ def upsample(x, stride, target_len, separate_cls=True, truncate_seq=False):
...
@@ -684,7 +717,7 @@ def upsample(x, stride, target_len, separate_cls=True, truncate_seq=False):
class
FunnelDecoder
(
nn
.
Module
):
class
FunnelDecoder
(
nn
.
Module
):
def
__init__
(
self
,
config
)
:
def
__init__
(
self
,
config
:
FunnelConfig
)
->
None
:
super
().
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
config
=
config
self
.
attention_structure
=
FunnelAttentionStructure
(
config
)
self
.
attention_structure
=
FunnelAttentionStructure
(
config
)
...
@@ -692,14 +725,14 @@ class FunnelDecoder(nn.Module):
...
@@ -692,14 +725,14 @@ class FunnelDecoder(nn.Module):
def
forward
(
def
forward
(
self
,
self
,
final_hidden
,
final_hidden
:
torch
.
Tensor
,
first_block_hidden
,
first_block_hidden
:
torch
.
Tensor
,
attention_mask
=
None
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
output_attentions
=
False
,
output_attentions
:
bool
=
False
,
output_hidden_states
=
False
,
output_hidden_states
:
bool
=
False
,
return_dict
=
True
,
return_dict
:
bool
=
True
,
):
)
->
Union
[
Tuple
,
BaseModelOutput
]
:
upsampled_hidden
=
upsample
(
upsampled_hidden
=
upsample
(
final_hidden
,
final_hidden
,
stride
=
2
**
(
len
(
self
.
config
.
block_sizes
)
-
1
),
stride
=
2
**
(
len
(
self
.
config
.
block_sizes
)
-
1
),
...
@@ -735,13 +768,13 @@ class FunnelDecoder(nn.Module):
...
@@ -735,13 +768,13 @@ class FunnelDecoder(nn.Module):
class
FunnelDiscriminatorPredictions
(
nn
.
Module
):
class
FunnelDiscriminatorPredictions
(
nn
.
Module
):
"""Prediction module for the discriminator, made up of two dense layers."""
"""Prediction module for the discriminator, made up of two dense layers."""
def
__init__
(
self
,
config
)
:
def
__init__
(
self
,
config
:
FunnelConfig
)
->
None
:
super
().
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
config
=
config
self
.
dense
=
nn
.
Linear
(
config
.
d_model
,
config
.
d_model
)
self
.
dense
=
nn
.
Linear
(
config
.
d_model
,
config
.
d_model
)
self
.
dense_prediction
=
nn
.
Linear
(
config
.
d_model
,
1
)
self
.
dense_prediction
=
nn
.
Linear
(
config
.
d_model
,
1
)
def
forward
(
self
,
discriminator_hidden_states
)
:
def
forward
(
self
,
discriminator_hidden_states
:
torch
.
Tensor
)
->
torch
.
Tensor
:
hidden_states
=
self
.
dense
(
discriminator_hidden_states
)
hidden_states
=
self
.
dense
(
discriminator_hidden_states
)
hidden_states
=
ACT2FN
[
self
.
config
.
hidden_act
](
hidden_states
)
hidden_states
=
ACT2FN
[
self
.
config
.
hidden_act
](
hidden_states
)
logits
=
self
.
dense_prediction
(
hidden_states
).
squeeze
()
logits
=
self
.
dense_prediction
(
hidden_states
).
squeeze
()
...
@@ -784,13 +817,13 @@ class FunnelPreTrainedModel(PreTrainedModel):
...
@@ -784,13 +817,13 @@ class FunnelPreTrainedModel(PreTrainedModel):
class
FunnelClassificationHead
(
nn
.
Module
):
class
FunnelClassificationHead
(
nn
.
Module
):
def
__init__
(
self
,
config
,
n_labels
)
:
def
__init__
(
self
,
config
:
FunnelConfig
,
n_labels
:
int
)
->
None
:
super
().
__init__
()
super
().
__init__
()
self
.
linear_hidden
=
nn
.
Linear
(
config
.
d_model
,
config
.
d_model
)
self
.
linear_hidden
=
nn
.
Linear
(
config
.
d_model
,
config
.
d_model
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout
)
self
.
linear_out
=
nn
.
Linear
(
config
.
d_model
,
n_labels
)
self
.
linear_out
=
nn
.
Linear
(
config
.
d_model
,
n_labels
)
def
forward
(
self
,
hidden
)
:
def
forward
(
self
,
hidden
:
torch
.
Tensor
)
->
torch
.
Tensor
:
hidden
=
self
.
linear_hidden
(
hidden
)
hidden
=
self
.
linear_hidden
(
hidden
)
hidden
=
torch
.
tanh
(
hidden
)
hidden
=
torch
.
tanh
(
hidden
)
hidden
=
self
.
dropout
(
hidden
)
hidden
=
self
.
dropout
(
hidden
)
...
@@ -892,7 +925,7 @@ FUNNEL_INPUTS_DOCSTRING = r"""
...
@@ -892,7 +925,7 @@ FUNNEL_INPUTS_DOCSTRING = r"""
FUNNEL_START_DOCSTRING
,
FUNNEL_START_DOCSTRING
,
)
)
class
FunnelBaseModel
(
FunnelPreTrainedModel
):
class
FunnelBaseModel
(
FunnelPreTrainedModel
):
def
__init__
(
self
,
config
)
:
def
__init__
(
self
,
config
:
FunnelConfig
)
->
None
:
super
().
__init__
(
config
)
super
().
__init__
(
config
)
self
.
embeddings
=
FunnelEmbeddings
(
config
)
self
.
embeddings
=
FunnelEmbeddings
(
config
)
...
@@ -901,10 +934,10 @@ class FunnelBaseModel(FunnelPreTrainedModel):
...
@@ -901,10 +934,10 @@ class FunnelBaseModel(FunnelPreTrainedModel):
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
def
get_input_embeddings
(
self
):
def
get_input_embeddings
(
self
)
->
nn
.
Embedding
:
return
self
.
embeddings
.
word_embeddings
return
self
.
embeddings
.
word_embeddings
def
set_input_embeddings
(
self
,
new_embeddings
)
:
def
set_input_embeddings
(
self
,
new_embeddings
:
nn
.
Embedding
)
->
None
:
self
.
embeddings
.
word_embeddings
=
new_embeddings
self
.
embeddings
.
word_embeddings
=
new_embeddings
@
add_start_docstrings_to_model_forward
(
FUNNEL_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_start_docstrings_to_model_forward
(
FUNNEL_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
...
@@ -916,16 +949,16 @@ class FunnelBaseModel(FunnelPreTrainedModel):
...
@@ -916,16 +949,16 @@ class FunnelBaseModel(FunnelPreTrainedModel):
)
)
def
forward
(
def
forward
(
self
,
self
,
input_ids
=
None
,
input_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
attention_mask
=
None
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
position_ids
=
None
,
position_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
head_mask
=
None
,
head_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
inputs_embeds
=
None
,
inputs_embeds
:
Optional
[
torch
.
Tensor
]
=
None
,
output_attentions
=
None
,
output_attentions
:
Optional
[
bool
]
=
None
,
output_hidden_states
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
return_dict
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
):
)
->
Union
[
Tuple
,
BaseModelOutput
]
:
output_attentions
=
output_attentions
if
output_attentions
is
not
None
else
self
.
config
.
output_attentions
output_attentions
=
output_attentions
if
output_attentions
is
not
None
else
self
.
config
.
output_attentions
output_hidden_states
=
(
output_hidden_states
=
(
output_hidden_states
if
output_hidden_states
is
not
None
else
self
.
config
.
output_hidden_states
output_hidden_states
if
output_hidden_states
is
not
None
else
self
.
config
.
output_hidden_states
...
@@ -969,7 +1002,7 @@ class FunnelBaseModel(FunnelPreTrainedModel):
...
@@ -969,7 +1002,7 @@ class FunnelBaseModel(FunnelPreTrainedModel):
FUNNEL_START_DOCSTRING
,
FUNNEL_START_DOCSTRING
,
)
)
class
FunnelModel
(
FunnelPreTrainedModel
):
class
FunnelModel
(
FunnelPreTrainedModel
):
def
__init__
(
self
,
config
)
:
def
__init__
(
self
,
config
:
FunnelConfig
)
->
None
:
super
().
__init__
(
config
)
super
().
__init__
(
config
)
self
.
config
=
config
self
.
config
=
config
self
.
embeddings
=
FunnelEmbeddings
(
config
)
self
.
embeddings
=
FunnelEmbeddings
(
config
)
...
@@ -979,10 +1012,10 @@ class FunnelModel(FunnelPreTrainedModel):
...
@@ -979,10 +1012,10 @@ class FunnelModel(FunnelPreTrainedModel):
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
def
get_input_embeddings
(
self
):
def
get_input_embeddings
(
self
)
->
nn
.
Embedding
:
return
self
.
embeddings
.
word_embeddings
return
self
.
embeddings
.
word_embeddings
def
set_input_embeddings
(
self
,
new_embeddings
)
:
def
set_input_embeddings
(
self
,
new_embeddings
:
nn
.
Embedding
)
->
None
:
self
.
embeddings
.
word_embeddings
=
new_embeddings
self
.
embeddings
.
word_embeddings
=
new_embeddings
@
add_start_docstrings_to_model_forward
(
FUNNEL_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_start_docstrings_to_model_forward
(
FUNNEL_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
...
@@ -994,14 +1027,14 @@ class FunnelModel(FunnelPreTrainedModel):
...
@@ -994,14 +1027,14 @@ class FunnelModel(FunnelPreTrainedModel):
)
)
def
forward
(
def
forward
(
self
,
self
,
input_ids
=
None
,
input_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
attention_mask
=
None
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
inputs_embeds
=
None
,
inputs_embeds
:
Optional
[
torch
.
Tensor
]
=
None
,
output_attentions
=
None
,
output_attentions
:
Optional
[
bool
]
=
None
,
output_hidden_states
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
return_dict
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
):
)
->
Union
[
Tuple
,
BaseModelOutput
]
:
output_attentions
=
output_attentions
if
output_attentions
is
not
None
else
self
.
config
.
output_attentions
output_attentions
=
output_attentions
if
output_attentions
is
not
None
else
self
.
config
.
output_attentions
output_hidden_states
=
(
output_hidden_states
=
(
output_hidden_states
if
output_hidden_states
is
not
None
else
self
.
config
.
output_hidden_states
output_hidden_states
if
output_hidden_states
is
not
None
else
self
.
config
.
output_hidden_states
...
@@ -1077,7 +1110,7 @@ add_start_docstrings(
...
@@ -1077,7 +1110,7 @@ add_start_docstrings(
class
FunnelForPreTraining
(
FunnelPreTrainedModel
):
class
FunnelForPreTraining
(
FunnelPreTrainedModel
):
def
__init__
(
self
,
config
)
:
def
__init__
(
self
,
config
:
FunnelConfig
)
->
None
:
super
().
__init__
(
config
)
super
().
__init__
(
config
)
self
.
funnel
=
FunnelModel
(
config
)
self
.
funnel
=
FunnelModel
(
config
)
...
@@ -1089,15 +1122,15 @@ class FunnelForPreTraining(FunnelPreTrainedModel):
...
@@ -1089,15 +1122,15 @@ class FunnelForPreTraining(FunnelPreTrainedModel):
@
replace_return_docstrings
(
output_type
=
FunnelForPreTrainingOutput
,
config_class
=
_CONFIG_FOR_DOC
)
@
replace_return_docstrings
(
output_type
=
FunnelForPreTrainingOutput
,
config_class
=
_CONFIG_FOR_DOC
)
def
forward
(
def
forward
(
self
,
self
,
input_ids
=
None
,
input_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
attention_mask
=
None
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
inputs_embeds
=
None
,
inputs_embeds
:
Optional
[
torch
.
Tensor
]
=
None
,
labels
=
None
,
labels
:
Optional
[
torch
.
Tensor
]
=
None
,
output_attentions
=
None
,
output_attentions
:
Optional
[
bool
]
=
None
,
output_hidden_states
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
return_dict
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
):
)
->
Union
[
Tuple
,
FunnelForPreTrainingOutput
]
:
r
"""
r
"""
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the ELECTRA-style loss. Input should be a sequence of tokens (see `input_ids`
Labels for computing the ELECTRA-style loss. Input should be a sequence of tokens (see `input_ids`
...
@@ -1160,7 +1193,7 @@ class FunnelForPreTraining(FunnelPreTrainedModel):
...
@@ -1160,7 +1193,7 @@ class FunnelForPreTraining(FunnelPreTrainedModel):
@
add_start_docstrings
(
"""Funnel Transformer Model with a `language modeling` head on top."""
,
FUNNEL_START_DOCSTRING
)
@
add_start_docstrings
(
"""Funnel Transformer Model with a `language modeling` head on top."""
,
FUNNEL_START_DOCSTRING
)
class
FunnelForMaskedLM
(
FunnelPreTrainedModel
):
class
FunnelForMaskedLM
(
FunnelPreTrainedModel
):
def
__init__
(
self
,
config
)
:
def
__init__
(
self
,
config
:
FunnelConfig
)
->
None
:
super
().
__init__
(
config
)
super
().
__init__
(
config
)
self
.
funnel
=
FunnelModel
(
config
)
self
.
funnel
=
FunnelModel
(
config
)
...
@@ -1169,10 +1202,10 @@ class FunnelForMaskedLM(FunnelPreTrainedModel):
...
@@ -1169,10 +1202,10 @@ class FunnelForMaskedLM(FunnelPreTrainedModel):
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
def
get_output_embeddings
(
self
):
def
get_output_embeddings
(
self
)
->
nn
.
Linear
:
return
self
.
lm_head
return
self
.
lm_head
def
set_output_embeddings
(
self
,
new_embeddings
)
:
def
set_output_embeddings
(
self
,
new_embeddings
:
nn
.
Embedding
)
->
None
:
self
.
lm_head
=
new_embeddings
self
.
lm_head
=
new_embeddings
@
add_start_docstrings_to_model_forward
(
FUNNEL_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_start_docstrings_to_model_forward
(
FUNNEL_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
...
@@ -1185,15 +1218,15 @@ class FunnelForMaskedLM(FunnelPreTrainedModel):
...
@@ -1185,15 +1218,15 @@ class FunnelForMaskedLM(FunnelPreTrainedModel):
)
)
def
forward
(
def
forward
(
self
,
self
,
input_ids
=
None
,
input_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
attention_mask
=
None
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
inputs_embeds
=
None
,
inputs_embeds
:
Optional
[
torch
.
Tensor
]
=
None
,
labels
=
None
,
labels
:
Optional
[
torch
.
Tensor
]
=
None
,
output_attentions
=
None
,
output_attentions
:
Optional
[
bool
]
=
None
,
output_hidden_states
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
return_dict
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
):
)
->
Union
[
Tuple
,
MaskedLMOutput
]
:
r
"""
r
"""
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
...
@@ -1240,7 +1273,7 @@ class FunnelForMaskedLM(FunnelPreTrainedModel):
...
@@ -1240,7 +1273,7 @@ class FunnelForMaskedLM(FunnelPreTrainedModel):
FUNNEL_START_DOCSTRING
,
FUNNEL_START_DOCSTRING
,
)
)
class
FunnelForSequenceClassification
(
FunnelPreTrainedModel
):
class
FunnelForSequenceClassification
(
FunnelPreTrainedModel
):
def
__init__
(
self
,
config
)
:
def
__init__
(
self
,
config
:
FunnelConfig
)
->
None
:
super
().
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
self
.
config
=
config
self
.
config
=
config
...
@@ -1259,15 +1292,15 @@ class FunnelForSequenceClassification(FunnelPreTrainedModel):
...
@@ -1259,15 +1292,15 @@ class FunnelForSequenceClassification(FunnelPreTrainedModel):
)
)
def
forward
(
def
forward
(
self
,
self
,
input_ids
=
None
,
input_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
attention_mask
=
None
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
inputs_embeds
=
None
,
inputs_embeds
:
Optional
[
torch
.
Tensor
]
=
None
,
labels
=
None
,
labels
:
Optional
[
torch
.
Tensor
]
=
None
,
output_attentions
=
None
,
output_attentions
:
Optional
[
bool
]
=
None
,
output_hidden_states
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
return_dict
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
):
)
->
Union
[
Tuple
,
SequenceClassifierOutput
]
:
r
"""
r
"""
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
...
@@ -1333,7 +1366,7 @@ class FunnelForSequenceClassification(FunnelPreTrainedModel):
...
@@ -1333,7 +1366,7 @@ class FunnelForSequenceClassification(FunnelPreTrainedModel):
FUNNEL_START_DOCSTRING
,
FUNNEL_START_DOCSTRING
,
)
)
class
FunnelForMultipleChoice
(
FunnelPreTrainedModel
):
class
FunnelForMultipleChoice
(
FunnelPreTrainedModel
):
def
__init__
(
self
,
config
)
:
def
__init__
(
self
,
config
:
FunnelConfig
)
->
None
:
super
().
__init__
(
config
)
super
().
__init__
(
config
)
self
.
funnel
=
FunnelBaseModel
(
config
)
self
.
funnel
=
FunnelBaseModel
(
config
)
...
@@ -1350,15 +1383,15 @@ class FunnelForMultipleChoice(FunnelPreTrainedModel):
...
@@ -1350,15 +1383,15 @@ class FunnelForMultipleChoice(FunnelPreTrainedModel):
)
)
def
forward
(
def
forward
(
self
,
self
,
input_ids
=
None
,
input_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
attention_mask
=
None
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
inputs_embeds
=
None
,
inputs_embeds
:
Optional
[
torch
.
Tensor
]
=
None
,
labels
=
None
,
labels
:
Optional
[
torch
.
Tensor
]
=
None
,
output_attentions
=
None
,
output_attentions
:
Optional
[
bool
]
=
None
,
output_hidden_states
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
return_dict
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
):
)
->
Union
[
Tuple
,
MultipleChoiceModelOutput
]
:
r
"""
r
"""
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
...
@@ -1417,7 +1450,7 @@ class FunnelForMultipleChoice(FunnelPreTrainedModel):
...
@@ -1417,7 +1450,7 @@ class FunnelForMultipleChoice(FunnelPreTrainedModel):
FUNNEL_START_DOCSTRING
,
FUNNEL_START_DOCSTRING
,
)
)
class
FunnelForTokenClassification
(
FunnelPreTrainedModel
):
class
FunnelForTokenClassification
(
FunnelPreTrainedModel
):
def
__init__
(
self
,
config
)
:
def
__init__
(
self
,
config
:
FunnelConfig
)
->
None
:
super
().
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
...
@@ -1437,15 +1470,15 @@ class FunnelForTokenClassification(FunnelPreTrainedModel):
...
@@ -1437,15 +1470,15 @@ class FunnelForTokenClassification(FunnelPreTrainedModel):
)
)
def
forward
(
def
forward
(
self
,
self
,
input_ids
=
None
,
input_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
attention_mask
=
None
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
inputs_embeds
=
None
,
inputs_embeds
:
Optional
[
torch
.
Tensor
]
=
None
,
labels
=
None
,
labels
:
Optional
[
torch
.
Tensor
]
=
None
,
output_attentions
=
None
,
output_attentions
:
Optional
[
bool
]
=
None
,
output_hidden_states
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
return_dict
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
):
)
->
Union
[
Tuple
,
TokenClassifierOutput
]
:
r
"""
r
"""
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
...
@@ -1491,7 +1524,7 @@ class FunnelForTokenClassification(FunnelPreTrainedModel):
...
@@ -1491,7 +1524,7 @@ class FunnelForTokenClassification(FunnelPreTrainedModel):
FUNNEL_START_DOCSTRING
,
FUNNEL_START_DOCSTRING
,
)
)
class
FunnelForQuestionAnswering
(
FunnelPreTrainedModel
):
class
FunnelForQuestionAnswering
(
FunnelPreTrainedModel
):
def
__init__
(
self
,
config
)
:
def
__init__
(
self
,
config
:
FunnelConfig
)
->
None
:
super
().
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
...
@@ -1510,16 +1543,16 @@ class FunnelForQuestionAnswering(FunnelPreTrainedModel):
...
@@ -1510,16 +1543,16 @@ class FunnelForQuestionAnswering(FunnelPreTrainedModel):
)
)
def
forward
(
def
forward
(
self
,
self
,
input_ids
=
None
,
input_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
attention_mask
=
None
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
inputs_embeds
=
None
,
inputs_embeds
:
Optional
[
torch
.
Tensor
]
=
None
,
start_positions
=
None
,
start_positions
:
Optional
[
torch
.
Tensor
]
=
None
,
end_positions
=
None
,
end_positions
:
Optional
[
torch
.
Tensor
]
=
None
,
output_attentions
=
None
,
output_attentions
:
Optional
[
bool
]
=
None
,
output_hidden_states
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
return_dict
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
):
)
->
Union
[
Tuple
,
QuestionAnsweringModelOutput
]
:
r
"""
r
"""
start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.
Labels for position (index) of the start of the labelled span for computing the token classification loss.
...
...
src/transformers/models/funnel/modeling_tf_funnel.py
View file @
7b262b96
...
@@ -16,8 +16,9 @@
...
@@ -16,8 +16,9 @@
import
warnings
import
warnings
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
typing
import
Dict
,
Optional
,
Tuple
from
typing
import
Dict
,
Optional
,
Tuple
,
Union
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
from
...activations_tf
import
get_tf_activation
from
...activations_tf
import
get_tf_activation
...
@@ -39,6 +40,7 @@ from ...modeling_tf_outputs import (
...
@@ -39,6 +40,7 @@ from ...modeling_tf_outputs import (
)
)
from
...modeling_tf_utils
import
(
from
...modeling_tf_utils
import
(
TFMaskedLanguageModelingLoss
,
TFMaskedLanguageModelingLoss
,
TFModelInputType
,
TFMultipleChoiceLoss
,
TFMultipleChoiceLoss
,
TFPreTrainedModel
,
TFPreTrainedModel
,
TFQuestionAnsweringLoss
,
TFQuestionAnsweringLoss
,
...
@@ -1093,7 +1095,7 @@ FUNNEL_INPUTS_DOCSTRING = r"""
...
@@ -1093,7 +1095,7 @@ FUNNEL_INPUTS_DOCSTRING = r"""
FUNNEL_START_DOCSTRING
,
FUNNEL_START_DOCSTRING
,
)
)
class
TFFunnelBaseModel
(
TFFunnelPreTrainedModel
):
class
TFFunnelBaseModel
(
TFFunnelPreTrainedModel
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
def
__init__
(
self
,
config
:
FunnelConfig
,
*
inputs
,
**
kwargs
)
->
None
:
super
().
__init__
(
config
,
*
inputs
,
**
kwargs
)
super
().
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
funnel
=
TFFunnelBaseLayer
(
config
,
name
=
"funnel"
)
self
.
funnel
=
TFFunnelBaseLayer
(
config
,
name
=
"funnel"
)
...
@@ -1107,16 +1109,16 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel):
...
@@ -1107,16 +1109,16 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel):
@
unpack_inputs
@
unpack_inputs
def
call
(
def
call
(
self
,
self
,
input_ids
=
None
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
attention_mask
=
None
,
attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
inputs_embeds
=
None
,
inputs_embeds
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
output_attentions
=
None
,
output_attentions
:
Optional
[
bool
]
=
None
,
output_hidden_states
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
return_dict
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
training
=
False
,
training
:
bool
=
False
,
**
kwargs
,
**
kwargs
,
):
)
->
Union
[
Tuple
[
tf
.
Tensor
],
TFBaseModelOutput
]
:
return
self
.
funnel
(
return
self
.
funnel
(
input_ids
=
input_ids
,
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
attention_mask
=
attention_mask
,
...
@@ -1141,7 +1143,7 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel):
...
@@ -1141,7 +1143,7 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel):
FUNNEL_START_DOCSTRING
,
FUNNEL_START_DOCSTRING
,
)
)
class
TFFunnelModel
(
TFFunnelPreTrainedModel
):
class
TFFunnelModel
(
TFFunnelPreTrainedModel
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
def
__init__
(
self
,
config
:
FunnelConfig
,
*
inputs
,
**
kwargs
)
->
None
:
super
().
__init__
(
config
,
*
inputs
,
**
kwargs
)
super
().
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
funnel
=
TFFunnelMainLayer
(
config
,
name
=
"funnel"
)
self
.
funnel
=
TFFunnelMainLayer
(
config
,
name
=
"funnel"
)
...
@@ -1155,16 +1157,16 @@ class TFFunnelModel(TFFunnelPreTrainedModel):
...
@@ -1155,16 +1157,16 @@ class TFFunnelModel(TFFunnelPreTrainedModel):
)
)
def
call
(
def
call
(
self
,
self
,
input_ids
=
None
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
attention_mask
=
None
,
attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
inputs_embeds
=
None
,
inputs_embeds
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
output_attentions
=
None
,
output_attentions
:
Optional
[
bool
]
=
None
,
output_hidden_states
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
return_dict
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
training
=
False
,
training
:
bool
=
False
,
**
kwargs
,
**
kwargs
,
):
)
->
Union
[
Tuple
[
tf
.
Tensor
],
TFBaseModelOutput
]
:
return
self
.
funnel
(
return
self
.
funnel
(
input_ids
=
input_ids
,
input_ids
=
input_ids
,
...
@@ -1192,7 +1194,7 @@ class TFFunnelModel(TFFunnelPreTrainedModel):
...
@@ -1192,7 +1194,7 @@ class TFFunnelModel(TFFunnelPreTrainedModel):
FUNNEL_START_DOCSTRING
,
FUNNEL_START_DOCSTRING
,
)
)
class
TFFunnelForPreTraining
(
TFFunnelPreTrainedModel
):
class
TFFunnelForPreTraining
(
TFFunnelPreTrainedModel
):
def
__init__
(
self
,
config
,
**
kwargs
):
def
__init__
(
self
,
config
:
FunnelConfig
,
**
kwargs
)
->
None
:
super
().
__init__
(
config
,
**
kwargs
)
super
().
__init__
(
config
,
**
kwargs
)
self
.
funnel
=
TFFunnelMainLayer
(
config
,
name
=
"funnel"
)
self
.
funnel
=
TFFunnelMainLayer
(
config
,
name
=
"funnel"
)
...
@@ -1203,16 +1205,16 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel):
...
@@ -1203,16 +1205,16 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel):
@
replace_return_docstrings
(
output_type
=
TFFunnelForPreTrainingOutput
,
config_class
=
_CONFIG_FOR_DOC
)
@
replace_return_docstrings
(
output_type
=
TFFunnelForPreTrainingOutput
,
config_class
=
_CONFIG_FOR_DOC
)
def
call
(
def
call
(
self
,
self
,
input_ids
=
None
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
attention_mask
=
None
,
attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
inputs_embeds
=
None
,
inputs_embeds
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
output_attentions
=
None
,
output_attentions
:
Optional
[
bool
]
=
None
,
output_hidden_states
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
return_dict
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
training
=
False
,
training
:
bool
=
False
,
**
kwargs
**
kwargs
):
)
->
Union
[
Tuple
[
tf
.
Tensor
],
TFFunnelForPreTrainingOutput
]
:
r
"""
r
"""
Returns:
Returns:
...
@@ -1259,16 +1261,16 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel):
...
@@ -1259,16 +1261,16 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel):
@
add_start_docstrings
(
"""Funnel Model with a `language modeling` head on top."""
,
FUNNEL_START_DOCSTRING
)
@
add_start_docstrings
(
"""Funnel Model with a `language modeling` head on top."""
,
FUNNEL_START_DOCSTRING
)
class
TFFunnelForMaskedLM
(
TFFunnelPreTrainedModel
,
TFMaskedLanguageModelingLoss
):
class
TFFunnelForMaskedLM
(
TFFunnelPreTrainedModel
,
TFMaskedLanguageModelingLoss
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
def
__init__
(
self
,
config
:
FunnelConfig
,
*
inputs
,
**
kwargs
)
->
None
:
super
().
__init__
(
config
,
*
inputs
,
**
kwargs
)
super
().
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
funnel
=
TFFunnelMainLayer
(
config
,
name
=
"funnel"
)
self
.
funnel
=
TFFunnelMainLayer
(
config
,
name
=
"funnel"
)
self
.
lm_head
=
TFFunnelMaskedLMHead
(
config
,
self
.
funnel
.
embeddings
,
name
=
"lm_head"
)
self
.
lm_head
=
TFFunnelMaskedLMHead
(
config
,
self
.
funnel
.
embeddings
,
name
=
"lm_head"
)
def
get_lm_head
(
self
):
def
get_lm_head
(
self
)
->
TFFunnelMaskedLMHead
:
return
self
.
lm_head
return
self
.
lm_head
def
get_prefix_bias_name
(
self
):
def
get_prefix_bias_name
(
self
)
->
str
:
warnings
.
warn
(
"The method get_prefix_bias_name is deprecated. Please use `get_bias` instead."
,
FutureWarning
)
warnings
.
warn
(
"The method get_prefix_bias_name is deprecated. Please use `get_bias` instead."
,
FutureWarning
)
return
self
.
name
+
"/"
+
self
.
lm_head
.
name
return
self
.
name
+
"/"
+
self
.
lm_head
.
name
...
@@ -1282,17 +1284,17 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss)
...
@@ -1282,17 +1284,17 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss)
)
)
def
call
(
def
call
(
self
,
self
,
input_ids
=
None
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
attention_mask
=
None
,
attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
inputs_embeds
=
None
,
inputs_embeds
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
output_attentions
=
None
,
output_attentions
:
Optional
[
bool
]
=
None
,
output_hidden_states
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
return_dict
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
labels
=
None
,
labels
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
training
=
False
,
training
:
bool
=
False
,
**
kwargs
,
**
kwargs
,
):
)
->
Union
[
Tuple
[
tf
.
Tensor
],
TFMaskedLMOutput
]
:
r
"""
r
"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
...
@@ -1341,7 +1343,7 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss)
...
@@ -1341,7 +1343,7 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss)
FUNNEL_START_DOCSTRING
,
FUNNEL_START_DOCSTRING
,
)
)
class
TFFunnelForSequenceClassification
(
TFFunnelPreTrainedModel
,
TFSequenceClassificationLoss
):
class
TFFunnelForSequenceClassification
(
TFFunnelPreTrainedModel
,
TFSequenceClassificationLoss
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
def
__init__
(
self
,
config
:
FunnelConfig
,
*
inputs
,
**
kwargs
)
->
None
:
super
().
__init__
(
config
,
*
inputs
,
**
kwargs
)
super
().
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
...
@@ -1358,17 +1360,17 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass
...
@@ -1358,17 +1360,17 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass
)
)
def
call
(
def
call
(
self
,
self
,
input_ids
=
None
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
attention_mask
=
None
,
attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
inputs_embeds
=
None
,
inputs_embeds
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
output_attentions
=
None
,
output_attentions
:
Optional
[
bool
]
=
None
,
output_hidden_states
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
return_dict
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
labels
=
None
,
labels
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
training
=
False
,
training
:
bool
=
False
,
**
kwargs
,
**
kwargs
,
):
)
->
Union
[
Tuple
[
tf
.
Tensor
],
TFSequenceClassifierOutput
]
:
r
"""
r
"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
...
@@ -1418,7 +1420,7 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass
...
@@ -1418,7 +1420,7 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass
FUNNEL_START_DOCSTRING
,
FUNNEL_START_DOCSTRING
,
)
)
class
TFFunnelForMultipleChoice
(
TFFunnelPreTrainedModel
,
TFMultipleChoiceLoss
):
class
TFFunnelForMultipleChoice
(
TFFunnelPreTrainedModel
,
TFMultipleChoiceLoss
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
def
__init__
(
self
,
config
:
FunnelConfig
,
*
inputs
,
**
kwargs
)
->
None
:
super
().
__init__
(
config
,
*
inputs
,
**
kwargs
)
super
().
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
funnel
=
TFFunnelBaseLayer
(
config
,
name
=
"funnel"
)
self
.
funnel
=
TFFunnelBaseLayer
(
config
,
name
=
"funnel"
)
...
@@ -1444,17 +1446,17 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss):
...
@@ -1444,17 +1446,17 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss):
)
)
def
call
(
def
call
(
self
,
self
,
input_ids
=
None
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
attention_mask
=
None
,
attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
inputs_embeds
=
None
,
inputs_embeds
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
output_attentions
=
None
,
output_attentions
:
Optional
[
bool
]
=
None
,
output_hidden_states
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
return_dict
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
labels
=
None
,
labels
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
training
=
False
,
training
:
bool
=
False
,
**
kwargs
,
**
kwargs
,
):
)
->
Union
[
Tuple
[
tf
.
Tensor
],
TFMultipleChoiceModelOutput
]
:
r
"""
r
"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
...
@@ -1514,7 +1516,7 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss):
...
@@ -1514,7 +1516,7 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss):
}
}
]
]
)
)
def
serving
(
self
,
inputs
:
Dict
[
str
,
tf
.
Tensor
]):
def
serving
(
self
,
inputs
:
Dict
[
str
,
tf
.
Tensor
])
->
TFMultipleChoiceModelOutput
:
output
=
self
.
call
(
input_ids
=
inputs
)
output
=
self
.
call
(
input_ids
=
inputs
)
return
self
.
serving_output
(
output
=
output
)
return
self
.
serving_output
(
output
=
output
)
...
@@ -1535,7 +1537,7 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss):
...
@@ -1535,7 +1537,7 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss):
FUNNEL_START_DOCSTRING
,
FUNNEL_START_DOCSTRING
,
)
)
class
TFFunnelForTokenClassification
(
TFFunnelPreTrainedModel
,
TFTokenClassificationLoss
):
class
TFFunnelForTokenClassification
(
TFFunnelPreTrainedModel
,
TFTokenClassificationLoss
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
def
__init__
(
self
,
config
:
FunnelConfig
,
*
inputs
,
**
kwargs
)
->
None
:
super
().
__init__
(
config
,
*
inputs
,
**
kwargs
)
super
().
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
...
@@ -1555,17 +1557,17 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat
...
@@ -1555,17 +1557,17 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat
)
)
def
call
(
def
call
(
self
,
self
,
input_ids
=
None
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
attention_mask
=
None
,
attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
inputs_embeds
=
None
,
inputs_embeds
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
output_attentions
=
None
,
output_attentions
:
Optional
[
bool
]
=
None
,
output_hidden_states
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
return_dict
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
labels
=
None
,
labels
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
training
=
False
,
training
:
bool
=
False
,
**
kwargs
,
**
kwargs
,
):
)
->
Union
[
Tuple
[
tf
.
Tensor
],
TFTokenClassifierOutput
]
:
r
"""
r
"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
...
@@ -1614,7 +1616,7 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat
...
@@ -1614,7 +1616,7 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat
FUNNEL_START_DOCSTRING
,
FUNNEL_START_DOCSTRING
,
)
)
class
TFFunnelForQuestionAnswering
(
TFFunnelPreTrainedModel
,
TFQuestionAnsweringLoss
):
class
TFFunnelForQuestionAnswering
(
TFFunnelPreTrainedModel
,
TFQuestionAnsweringLoss
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
def
__init__
(
self
,
config
:
FunnelConfig
,
*
inputs
,
**
kwargs
)
->
None
:
super
().
__init__
(
config
,
*
inputs
,
**
kwargs
)
super
().
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
...
@@ -1633,18 +1635,18 @@ class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringL
...
@@ -1633,18 +1635,18 @@ class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringL
)
)
def
call
(
def
call
(
self
,
self
,
input_ids
=
None
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
attention_mask
=
None
,
attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
token_type_ids
=
None
,
token_type_ids
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
inputs_embeds
=
None
,
inputs_embeds
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
output_attentions
=
None
,
output_attentions
:
Optional
[
bool
]
=
None
,
output_hidden_states
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
return_dict
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
start_positions
=
None
,
start_positions
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
end_positions
=
None
,
end_positions
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
training
=
False
,
training
:
bool
=
False
,
**
kwargs
,
**
kwargs
,
):
)
->
Union
[
Tuple
[
tf
.
Tensor
],
TFQuestionAnsweringModelOutput
]
:
r
"""
r
"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.
Labels for position (index) of the start of the labelled span for computing the token classification loss.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment