Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
4060d685
Unverified
Commit
4060d685
authored
Apr 20, 2023
by
Joao Gante
Committed by
GitHub
Apr 20, 2023
Browse files
XGLM: Fix left-padding (PT and TF) (#22828)
parent
474bf508
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
165 additions
and
263 deletions
+165
-263
src/transformers/models/xglm/modeling_flax_xglm.py
src/transformers/models/xglm/modeling_flax_xglm.py
+0
-12
src/transformers/models/xglm/modeling_tf_xglm.py
src/transformers/models/xglm/modeling_tf_xglm.py
+25
-18
src/transformers/models/xglm/modeling_xglm.py
src/transformers/models/xglm/modeling_xglm.py
+65
-133
tests/models/xglm/test_modeling_tf_xglm.py
tests/models/xglm/test_modeling_tf_xglm.py
+36
-64
tests/models/xglm/test_modeling_xglm.py
tests/models/xglm/test_modeling_xglm.py
+39
-36
No files found.
src/transformers/models/xglm/modeling_flax_xglm.py
View file @
4060d685
...
@@ -124,18 +124,6 @@ def create_sinusoidal_positions(n_pos, dim, padding_idx=1):
...
@@ -124,18 +124,6 @@ def create_sinusoidal_positions(n_pos, dim, padding_idx=1):
return
jnp
.
array
(
emb
)
return
jnp
.
array
(
emb
)
def
shift_tokens_right
(
input_ids
:
jnp
.
ndarray
,
pad_token_id
:
int
,
decoder_start_token_id
:
int
)
->
jnp
.
ndarray
:
"""
Shift input ids one token to the right.
"""
shifted_input_ids
=
jnp
.
roll
(
input_ids
,
1
,
axis
=-
1
)
shifted_input_ids
=
shifted_input_ids
.
at
[(...,
0
)].
set
(
decoder_start_token_id
)
# replace possible -100 values in labels by `pad_token_id`
shifted_input_ids
=
jnp
.
where
(
shifted_input_ids
==
-
100
,
pad_token_id
,
shifted_input_ids
)
return
shifted_input_ids
class
FlaxXGLMAttention
(
nn
.
Module
):
class
FlaxXGLMAttention
(
nn
.
Module
):
config
:
XGLMConfig
config
:
XGLMConfig
embed_dim
:
int
embed_dim
:
int
...
...
src/transformers/models/xglm/modeling_tf_xglm.py
View file @
4060d685
...
@@ -476,19 +476,8 @@ class TFXGLMMainLayer(tf.keras.layers.Layer):
...
@@ -476,19 +476,8 @@ class TFXGLMMainLayer(tf.keras.layers.Layer):
return
combined_attention_mask
return
combined_attention_mask
def
embed_positions
(
def
embed_positions
(
self
,
position_ids
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
)
->
tf
.
Tensor
:
self
,
position_ids
+=
self
.
offset
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
inputs_embeds
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
past_key_values_length
:
Optional
[
int
]
=
None
,
)
->
tf
.
Tensor
:
if
input_ids
is
not
None
:
position_ids
=
_create_position_ids_from_input_ids
(
input_ids
,
past_key_values_length
,
self
.
padding_idx
)
else
:
position_ids
=
_create_position_ids_from_inputs_embeds
(
inputs_embeds
,
past_key_values_length
,
self
.
padding_idx
)
positions
=
tf
.
gather
(
self
.
_embed_positions_weights
,
position_ids
,
axis
=
0
)
positions
=
tf
.
gather
(
self
.
_embed_positions_weights
,
position_ids
,
axis
=
0
)
return
positions
return
positions
...
@@ -497,6 +486,7 @@ class TFXGLMMainLayer(tf.keras.layers.Layer):
...
@@ -497,6 +486,7 @@ class TFXGLMMainLayer(tf.keras.layers.Layer):
self
,
self
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
position_ids
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
encoder_hidden_states
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
encoder_hidden_states
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
encoder_attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
encoder_attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
head_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
head_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
...
@@ -528,9 +518,14 @@ class TFXGLMMainLayer(tf.keras.layers.Layer):
...
@@ -528,9 +518,14 @@ class TFXGLMMainLayer(tf.keras.layers.Layer):
else
:
else
:
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
# past_key_values_length
past_key_values_length
=
past_key_values
[
0
][
0
].
shape
[
2
]
if
past_key_values
is
not
None
else
0
past_key_values_length
=
past_key_values
[
0
][
0
].
shape
[
2
]
if
past_key_values
is
not
None
else
0
if
position_ids
is
None
:
position_ids
=
tf
.
expand_dims
(
tf
.
range
(
past_key_values_length
,
input_shape
[
-
1
]
+
past_key_values_length
),
axis
=
0
)
position_ids
=
tf
.
reshape
(
position_ids
,
[
-
1
,
shape_list
(
position_ids
)[
-
1
]])
if
inputs_embeds
is
None
:
if
inputs_embeds
is
None
:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
...
@@ -552,7 +547,7 @@ class TFXGLMMainLayer(tf.keras.layers.Layer):
...
@@ -552,7 +547,7 @@ class TFXGLMMainLayer(tf.keras.layers.Layer):
encoder_attention_mask
=
_expand_mask
(
encoder_attention_mask
,
tgt_len
=
input_shape
[
-
1
])
encoder_attention_mask
=
_expand_mask
(
encoder_attention_mask
,
tgt_len
=
input_shape
[
-
1
])
# embed positions
# embed positions
positions
=
self
.
embed_positions
(
input_ids
,
inputs_embeds
,
past_key_values_length
)
positions
=
self
.
embed_positions
(
position_ids
)
hidden_states
=
tf
.
cast
(
inputs_embeds
,
dtype
=
tf
.
float32
)
+
positions
hidden_states
=
tf
.
cast
(
inputs_embeds
,
dtype
=
tf
.
float32
)
+
positions
...
@@ -713,6 +708,11 @@ XGLM_INPUTS_DOCSTRING = r"""
...
@@ -713,6 +708,11 @@ XGLM_INPUTS_DOCSTRING = r"""
- 0 for tokens that are **masked**.
- 0 for tokens that are **masked**.
[What are attention masks?](../glossary#attention-mask)
[What are attention masks?](../glossary#attention-mask)
position_ids (`tf.Tensor` or `Numpy array` of shape `(batch_size, sequence_length)`, *optional*):
Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
config.max_position_embeddings - 1]`.
[What are position IDs?](../glossary#position-ids)
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, encoder_sequence_length, hidden_size)`, *optional*):
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, encoder_sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention of
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention of
the decoder.
the decoder.
...
@@ -796,6 +796,7 @@ class TFXGLMModel(TFXGLMPreTrainedModel):
...
@@ -796,6 +796,7 @@ class TFXGLMModel(TFXGLMPreTrainedModel):
self
,
self
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
position_ids
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
encoder_hidden_states
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
encoder_hidden_states
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
encoder_attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
encoder_attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
head_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
head_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
...
@@ -876,9 +877,6 @@ class TFXGLMForCausalLM(TFXGLMPreTrainedModel, TFCausalLanguageModelingLoss):
...
@@ -876,9 +877,6 @@ class TFXGLMForCausalLM(TFXGLMPreTrainedModel, TFCausalLanguageModelingLoss):
name
=
"lm_head"
,
name
=
"lm_head"
,
)
)
# TODO (Joao): investigate why XGLM has numerical issues in XLA generate
self
.
supports_xla_generation
=
False
def
get_output_embeddings
(
self
):
def
get_output_embeddings
(
self
):
return
self
.
lm_head
return
self
.
lm_head
...
@@ -890,11 +888,18 @@ class TFXGLMForCausalLM(TFXGLMPreTrainedModel, TFCausalLanguageModelingLoss):
...
@@ -890,11 +888,18 @@ class TFXGLMForCausalLM(TFXGLMPreTrainedModel, TFCausalLanguageModelingLoss):
if
past_key_values
:
if
past_key_values
:
inputs
=
tf
.
expand_dims
(
inputs
[:,
-
1
],
-
1
)
inputs
=
tf
.
expand_dims
(
inputs
[:,
-
1
],
-
1
)
position_ids
=
kwargs
.
get
(
"position_ids"
,
None
)
attention_mask
=
kwargs
.
get
(
"attention_mask"
,
None
)
attention_mask
=
kwargs
.
get
(
"attention_mask"
,
None
)
if
attention_mask
is
not
None
and
position_ids
is
None
:
position_ids
=
tf
.
math
.
cumsum
(
attention_mask
,
axis
=-
1
,
exclusive
=
True
)
if
past_key_values
:
position_ids
=
tf
.
expand_dims
(
position_ids
[:,
-
1
],
-
1
)
return
{
return
{
"input_ids"
:
inputs
,
"input_ids"
:
inputs
,
"attention_mask"
:
attention_mask
,
"attention_mask"
:
attention_mask
,
"position_ids"
:
position_ids
,
"past_key_values"
:
past_key_values
,
"past_key_values"
:
past_key_values
,
"use_cache"
:
use_cache
,
"use_cache"
:
use_cache
,
}
}
...
@@ -911,6 +916,7 @@ class TFXGLMForCausalLM(TFXGLMPreTrainedModel, TFCausalLanguageModelingLoss):
...
@@ -911,6 +916,7 @@ class TFXGLMForCausalLM(TFXGLMPreTrainedModel, TFCausalLanguageModelingLoss):
self
,
self
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
input_ids
:
Optional
[
TFModelInputType
]
=
None
,
attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
position_ids
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
encoder_hidden_states
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
encoder_hidden_states
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
encoder_attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
encoder_attention_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
head_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
head_mask
:
Optional
[
Union
[
np
.
ndarray
,
tf
.
Tensor
]]
=
None
,
...
@@ -935,6 +941,7 @@ class TFXGLMForCausalLM(TFXGLMPreTrainedModel, TFCausalLanguageModelingLoss):
...
@@ -935,6 +941,7 @@ class TFXGLMForCausalLM(TFXGLMPreTrainedModel, TFCausalLanguageModelingLoss):
outputs
=
self
.
model
(
outputs
=
self
.
model
(
input_ids
=
input_ids
,
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
attention_mask
=
attention_mask
,
position_ids
=
position_ids
,
encoder_hidden_states
=
encoder_hidden_states
,
encoder_hidden_states
=
encoder_hidden_states
,
encoder_attention_mask
=
encoder_attention_mask
,
encoder_attention_mask
=
encoder_attention_mask
,
head_mask
=
head_mask
,
head_mask
=
head_mask
,
...
...
src/transformers/models/xglm/modeling_xglm.py
View file @
4060d685
...
@@ -75,11 +75,34 @@ XGLM_INPUTS_DOCSTRING = r"""
...
@@ -75,11 +75,34 @@ XGLM_INPUTS_DOCSTRING = r"""
- 0 for tokens that are **masked**.
- 0 for tokens that are **masked**.
[What are attention masks?](../glossary#attention-mask)
[What are attention masks?](../glossary#attention-mask)
head_mask (`torch.Tensor` of shape `(encoder_layers, encoder_attention_heads)`, *optional*):
position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Mask to nullify selected heads of the attention modules in the encoder. Mask values selected in `[0, 1]`:
Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
config.max_position_embeddings - 1]`.
[What are position IDs?](../glossary#position-ids)
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, encoder_sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention of
the decoder.
encoder_attention_mask (`torch.LongTensor` of shape `(batch_size, encoder_sequence_length)`, *optional*):
Mask to avoid performing cross-attention on padding tokens indices of encoder input_ids. Mask values
selected in `[0, 1]`:
- 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**.
[What are attention masks?](../glossary#attention-mask)
head_mask (`torch.Tensor` of shape `(num_layers, attention_heads)`, *optional*):
Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`:
- 1 indicates the head is **not masked**,
- 1 indicates the head is **not masked**,
- 0 indicates the head is **masked**.
- 0 indicates the head is **masked**.
cross_attn_head_mask (`torch.Tensor` of shape `(num_layers, attention_heads)`, *optional*):
Mask to nullify selected heads of the cross-attention modules. Mask values selected in `[0, 1]`:
- 1 indicates the head is **not masked**,
- 0 indicates the head is **masked**.
past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
`(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of shape
`(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of shape
...
@@ -88,20 +111,12 @@ XGLM_INPUTS_DOCSTRING = r"""
...
@@ -88,20 +111,12 @@ XGLM_INPUTS_DOCSTRING = r"""
Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
If `past_key_values` are used, the user can optionally input only the last `input_ids` (those that don't
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
have their past key value states given to this model) of shape `(batch_size, 1)` instead of all `input_ids`
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape `(batch_size,
`decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you can choose to
`(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
directly pass an embedded representation. This is useful if you want more control over how to convert
can choose to directly pass an embedded representation. This is useful if you want more control over how to
`input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. If
`past_key_values` is used, optionally only the last `inputs_embeds` have to be input (see
`past_key_values`). This is useful if you want more control over how to convert `input_ids` indices into
associated vectors than the model's internal embedding lookup matrix.
use_cache (`bool`, *optional*):
If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
`past_key_values`).
output_attentions (`bool`, *optional*):
output_attentions (`bool`, *optional*):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
tensors for more detail.
tensors for more detail.
...
@@ -146,18 +161,6 @@ def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Optional[int]
...
@@ -146,18 +161,6 @@ def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Optional[int]
return
inverted_mask
.
masked_fill
(
inverted_mask
.
to
(
torch
.
bool
),
torch
.
finfo
(
dtype
).
min
)
return
inverted_mask
.
masked_fill
(
inverted_mask
.
to
(
torch
.
bool
),
torch
.
finfo
(
dtype
).
min
)
def
create_position_ids_from_input_ids
(
input_ids
,
padding_idx
,
past_key_values_length
=
0
):
"""
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
are ignored. This is modified from fairseq's `utils.make_positions`.
"""
# The series of casts and type-conversions here are carefully balanced to both work with ONNX export and XLA.
mask
=
input_ids
.
ne
(
padding_idx
).
int
()
incremental_indices
=
(
torch
.
cumsum
(
mask
,
dim
=
1
).
type_as
(
mask
)
+
past_key_values_length
)
*
mask
return
incremental_indices
.
long
()
+
padding_idx
# Copied from transformers.models.m2m_100.modeling_m2m_100.M2M100SinusoidalPositionalEmbedding with M2M100->XGLM
class
XGLMSinusoidalPositionalEmbedding
(
nn
.
Module
):
class
XGLMSinusoidalPositionalEmbedding
(
nn
.
Module
):
"""This module produces sinusoidal positional embeddings of any length."""
"""This module produces sinusoidal positional embeddings of any length."""
...
@@ -198,43 +201,17 @@ class XGLMSinusoidalPositionalEmbedding(nn.Module):
...
@@ -198,43 +201,17 @@ class XGLMSinusoidalPositionalEmbedding(nn.Module):
return
emb
.
to
(
torch
.
get_default_dtype
())
return
emb
.
to
(
torch
.
get_default_dtype
())
@
torch
.
no_grad
()
@
torch
.
no_grad
()
def
forward
(
def
forward
(
self
,
position_ids
:
torch
.
Tensor
=
None
,
past_key_values_length
:
int
=
0
):
self
,
input_ids
:
torch
.
Tensor
=
None
,
inputs_embeds
:
torch
.
Tensor
=
None
,
past_key_values_length
:
int
=
0
bsz
,
seq_len
=
position_ids
.
size
()
):
position_ids
+=
self
.
offset
if
input_ids
is
not
None
:
bsz
,
seq_len
=
input_ids
.
size
()
# Create the position ids from the input token ids. Any padded tokens remain padded.
position_ids
=
create_position_ids_from_input_ids
(
input_ids
,
self
.
padding_idx
,
past_key_values_length
).
to
(
input_ids
.
device
)
else
:
bsz
,
seq_len
=
inputs_embeds
.
size
()[:
-
1
]
position_ids
=
self
.
create_position_ids_from_inputs_embeds
(
inputs_embeds
,
past_key_values_length
)
#
e
xpand embeddings if needed
#
E
xpand embeddings if needed
. `position_ids.max()` is NOT used to keep torch.fx compatibility.
max_pos
=
self
.
padding_idx
+
1
+
seq_len
+
past_key_values_length
max_pos
=
2
+
seq_len
+
past_key_values_length
if
max_pos
>
self
.
weights
.
size
(
0
):
if
max_pos
>
self
.
weights
.
size
(
0
):
self
.
make_weights
(
max_pos
+
self
.
offset
,
self
.
embedding_dim
,
self
.
padding_idx
)
self
.
make_weights
(
max_pos
,
self
.
embedding_dim
,
self
.
padding_idx
)
return
self
.
weights
.
index_select
(
0
,
position_ids
.
view
(
-
1
)).
view
(
bsz
,
seq_len
,
self
.
weights
.
shape
[
-
1
]).
detach
()
return
self
.
weights
.
index_select
(
0
,
position_ids
.
view
(
-
1
)).
view
(
bsz
,
seq_len
,
self
.
weights
.
shape
[
-
1
]).
detach
()
def
create_position_ids_from_inputs_embeds
(
self
,
inputs_embeds
,
past_key_values_length
):
"""
We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.
Args:
inputs_embeds: torch.Tensor
Returns: torch.Tensor
"""
input_shape
=
inputs_embeds
.
size
()[:
-
1
]
sequence_length
=
input_shape
[
1
]
position_ids
=
torch
.
arange
(
self
.
padding_idx
+
1
,
sequence_length
+
self
.
padding_idx
+
1
,
dtype
=
torch
.
long
,
device
=
inputs_embeds
.
device
)
return
position_ids
.
unsqueeze
(
0
).
expand
(
input_shape
).
contiguous
()
+
past_key_values_length
class
XGLMAttention
(
nn
.
Module
):
class
XGLMAttention
(
nn
.
Module
):
"""Multi-headed attention from 'Attention Is All You Need' paper"""
"""Multi-headed attention from 'Attention Is All You Need' paper"""
...
@@ -605,6 +582,7 @@ class XGLMModel(XGLMPreTrainedModel):
...
@@ -605,6 +582,7 @@ class XGLMModel(XGLMPreTrainedModel):
self
,
self
,
input_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
input_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
position_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
encoder_hidden_states
:
Optional
[
torch
.
Tensor
]
=
None
,
encoder_hidden_states
:
Optional
[
torch
.
Tensor
]
=
None
,
encoder_attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
encoder_attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
head_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
head_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
...
@@ -616,70 +594,6 @@ class XGLMModel(XGLMPreTrainedModel):
...
@@ -616,70 +594,6 @@ class XGLMModel(XGLMPreTrainedModel):
output_hidden_states
:
Optional
[
bool
]
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
return_dict
:
Optional
[
bool
]
=
None
,
)
->
Union
[
Tuple
[
torch
.
Tensor
],
BaseModelOutputWithPastAndCrossAttentions
]:
)
->
Union
[
Tuple
[
torch
.
Tensor
],
BaseModelOutputWithPastAndCrossAttentions
]:
r
"""
Args:
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you
provide it.
Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
[`PreTrainedTokenizer.__call__`] for details.
[What are input IDs?](../glossary#input-ids)
attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
- 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**.
[What are attention masks?](../glossary#attention-mask)
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, encoder_sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention
of the decoder.
encoder_attention_mask (`torch.LongTensor` of shape `(batch_size, encoder_sequence_length)`, *optional*):
Mask to avoid performing cross-attention on padding tokens indices of encoder input_ids. Mask values
selected in `[0, 1]`:
- 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**.
[What are attention masks?](../glossary#attention-mask)
head_mask (`torch.Tensor` of shape `(num_layers, attention_heads)`, *optional*):
Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`:
- 1 indicates the head is **not masked**,
- 0 indicates the head is **masked**.
cross_attn_head_mask (`torch.Tensor` of shape `(num_layers, attention_heads)`, *optional*):
Mask to nullify selected heads of the cross-attention modules. Mask values selected in `[0, 1]`:
- 1 indicates the head is **not masked**,
- 0 indicates the head is **masked**.
past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of
shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of
shape `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.
Contains pre-computed hidden-states (key and values in the self-attention blocks and in the
cross-attention blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
control over how to convert `input_ids` indices into associated vectors than the model's internal
embedding lookup matrix.
output_attentions (`bool`, *optional*):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail.
output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
for more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
output_attentions
=
output_attentions
if
output_attentions
is
not
None
else
self
.
config
.
output_attentions
output_attentions
=
output_attentions
if
output_attentions
is
not
None
else
self
.
config
.
output_attentions
output_hidden_states
=
(
output_hidden_states
=
(
output_hidden_states
if
output_hidden_states
is
not
None
else
self
.
config
.
output_hidden_states
output_hidden_states
if
output_hidden_states
is
not
None
else
self
.
config
.
output_hidden_states
...
@@ -698,9 +612,19 @@ class XGLMModel(XGLMPreTrainedModel):
...
@@ -698,9 +612,19 @@ class XGLMModel(XGLMPreTrainedModel):
else
:
else
:
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
# past_key_values_length
past_key_values_length
=
past_key_values
[
0
][
0
].
shape
[
2
]
if
past_key_values
is
not
None
else
0
past_key_values_length
=
past_key_values
[
0
][
0
].
shape
[
2
]
if
past_key_values
is
not
None
else
0
if
position_ids
is
None
:
position_ids
=
torch
.
arange
(
past_key_values_length
,
input_shape
[
-
1
]
+
past_key_values_length
,
dtype
=
torch
.
long
,
device
=
input_ids
.
device
if
input_ids
is
not
None
else
inputs_embeds
.
device
,
)
position_ids
=
position_ids
.
unsqueeze
(
0
).
view
(
-
1
,
input_shape
[
-
1
])
else
:
position_ids
=
position_ids
.
view
(
-
1
,
input_shape
[
-
1
])
if
inputs_embeds
is
None
:
if
inputs_embeds
is
None
:
inputs_embeds
=
self
.
embed_tokens
(
input_ids
)
*
self
.
embed_scale
inputs_embeds
=
self
.
embed_tokens
(
input_ids
)
*
self
.
embed_scale
...
@@ -713,11 +637,7 @@ class XGLMModel(XGLMPreTrainedModel):
...
@@ -713,11 +637,7 @@ class XGLMModel(XGLMPreTrainedModel):
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
encoder_attention_mask
=
_expand_mask
(
encoder_attention_mask
,
inputs_embeds
.
dtype
,
tgt_len
=
input_shape
[
-
1
])
encoder_attention_mask
=
_expand_mask
(
encoder_attention_mask
,
inputs_embeds
.
dtype
,
tgt_len
=
input_shape
[
-
1
])
# embed positions
hidden_states
=
inputs_embeds
+
self
.
embed_positions
(
position_ids
,
past_key_values_length
)
positions
=
self
.
embed_positions
(
input_ids
,
inputs_embeds
,
past_key_values_length
)
hidden_states
=
inputs_embeds
+
positions
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
float
(
self
.
dropout
),
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
float
(
self
.
dropout
),
training
=
self
.
training
)
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
...
@@ -866,6 +786,7 @@ class XGLMForCausalLM(XGLMPreTrainedModel):
...
@@ -866,6 +786,7 @@ class XGLMForCausalLM(XGLMPreTrainedModel):
self
,
self
,
input_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
input_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
position_ids
:
Optional
[
torch
.
Tensor
]
=
None
,
encoder_hidden_states
:
Optional
[
torch
.
Tensor
]
=
None
,
encoder_hidden_states
:
Optional
[
torch
.
Tensor
]
=
None
,
encoder_attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
encoder_attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
head_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
head_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
...
@@ -895,6 +816,7 @@ class XGLMForCausalLM(XGLMPreTrainedModel):
...
@@ -895,6 +816,7 @@ class XGLMForCausalLM(XGLMPreTrainedModel):
outputs
=
self
.
model
(
outputs
=
self
.
model
(
input_ids
=
input_ids
,
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
attention_mask
=
attention_mask
,
position_ids
=
position_ids
,
encoder_hidden_states
=
encoder_hidden_states
,
encoder_hidden_states
=
encoder_hidden_states
,
encoder_attention_mask
=
encoder_attention_mask
,
encoder_attention_mask
=
encoder_attention_mask
,
head_mask
=
head_mask
,
head_mask
=
head_mask
,
...
@@ -935,6 +857,15 @@ class XGLMForCausalLM(XGLMPreTrainedModel):
...
@@ -935,6 +857,15 @@ class XGLMForCausalLM(XGLMPreTrainedModel):
def
prepare_inputs_for_generation
(
def
prepare_inputs_for_generation
(
self
,
input_ids
,
past_key_values
=
None
,
attention_mask
=
None
,
use_cache
=
None
,
**
kwargs
self
,
input_ids
,
past_key_values
=
None
,
attention_mask
=
None
,
use_cache
=
None
,
**
kwargs
):
):
position_ids
=
kwargs
.
get
(
"position_ids"
,
None
)
if
attention_mask
is
not
None
and
position_ids
is
None
:
# create position_ids on the fly for batch generation
position_ids
=
attention_mask
.
long
().
cumsum
(
-
1
)
-
1
position_ids
.
masked_fill_
(
attention_mask
==
0
,
1
)
if
past_key_values
:
position_ids
=
position_ids
[:,
-
1
].
unsqueeze
(
-
1
)
else
:
position_ids
=
None
# if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly
# if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly
if
attention_mask
is
None
:
if
attention_mask
is
None
:
attention_mask
=
input_ids
.
new_ones
(
input_ids
.
shape
)
attention_mask
=
input_ids
.
new_ones
(
input_ids
.
shape
)
...
@@ -945,6 +876,7 @@ class XGLMForCausalLM(XGLMPreTrainedModel):
...
@@ -945,6 +876,7 @@ class XGLMForCausalLM(XGLMPreTrainedModel):
return
{
return
{
"input_ids"
:
input_ids
,
# encoder_outputs is defined. input_ids not needed
"input_ids"
:
input_ids
,
# encoder_outputs is defined. input_ids not needed
"attention_mask"
:
attention_mask
,
"attention_mask"
:
attention_mask
,
"position_ids"
:
position_ids
,
"past_key_values"
:
past_key_values
,
"past_key_values"
:
past_key_values
,
"use_cache"
:
use_cache
,
"use_cache"
:
use_cache
,
}
}
...
...
tests/models/xglm/test_modeling_tf_xglm.py
View file @
4060d685
...
@@ -175,44 +175,6 @@ class TFXGLMModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase
...
@@ -175,44 +175,6 @@ class TFXGLMModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase
name
=
model
.
get_bias
()
name
=
model
.
get_bias
()
assert
name
is
None
assert
name
is
None
@
slow
def
test_batch_generation
(
self
):
model
=
TFXGLMForCausalLM
.
from_pretrained
(
"facebook/xglm-564M"
)
tokenizer
=
XGLMTokenizer
.
from_pretrained
(
"facebook/xglm-564M"
)
tokenizer
.
padding_side
=
"left"
# use different length sentences to test batching
sentences
=
[
"Hello, my dog is a little"
,
"Today, I"
,
]
inputs
=
tokenizer
(
sentences
,
return_tensors
=
"tf"
,
padding
=
True
)
outputs
=
model
.
generate
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
])
inputs_non_padded
=
tokenizer
(
sentences
[
0
],
return_tensors
=
"tf"
).
input_ids
output_non_padded
=
model
.
generate
(
input_ids
=
inputs_non_padded
)
num_paddings
=
(
inputs_non_padded
.
shape
[
-
1
]
-
tf
.
math
.
reduce_sum
(
tf
.
cast
(
inputs
[
"attention_mask"
][
-
1
],
dtype
=
tf
.
int64
)).
numpy
()
)
inputs_padded
=
tokenizer
(
sentences
[
1
],
return_tensors
=
"tf"
).
input_ids
output_padded
=
model
.
generate
(
input_ids
=
inputs_padded
,
max_length
=
model
.
config
.
max_length
-
num_paddings
)
batch_out_sentence
=
tokenizer
.
batch_decode
(
outputs
,
skip_special_tokens
=
True
)
non_padded_sentence
=
tokenizer
.
decode
(
output_non_padded
[
0
],
skip_special_tokens
=
True
)
padded_sentence
=
tokenizer
.
decode
(
output_padded
[
0
],
skip_special_tokens
=
True
)
expected_output_sentence
=
[
"Hello, my dog is a little bit of a shy one, but he is very friendly"
,
"Today, I am going to share with you a few of my favorite things"
,
]
self
.
assertListEqual
(
expected_output_sentence
,
batch_out_sentence
)
self
.
assertListEqual
(
expected_output_sentence
,
[
non_padded_sentence
,
padded_sentence
])
@
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
for
model_name
in
TF_XGLM_PRETRAINED_MODEL_ARCHIVE_LIST
[:
1
]:
for
model_name
in
TF_XGLM_PRETRAINED_MODEL_ARCHIVE_LIST
[:
1
]:
...
@@ -246,6 +208,8 @@ class TFXGLMModelLanguageGenerationTest(unittest.TestCase):
...
@@ -246,6 +208,8 @@ class TFXGLMModelLanguageGenerationTest(unittest.TestCase):
tf
.
random
.
set_seed
(
0
)
tf
.
random
.
set_seed
(
0
)
tokenized
=
tokenizer
(
"Today is a nice day and"
,
return_tensors
=
"tf"
)
tokenized
=
tokenizer
(
"Today is a nice day and"
,
return_tensors
=
"tf"
)
input_ids
=
tokenized
.
input_ids
input_ids
=
tokenized
.
input_ids
# forces the generation to happen on CPU, to avoid GPU-related quirks (and assure same output regardless of the available devices)
with
tf
.
device
(
":/CPU:0"
):
output_ids
=
model
.
generate
(
input_ids
,
do_sample
=
True
,
seed
=
[
7
,
0
])
output_ids
=
model
.
generate
(
input_ids
,
do_sample
=
True
,
seed
=
[
7
,
0
])
output_str
=
tokenizer
.
decode
(
output_ids
[
0
],
skip_special_tokens
=
True
)
output_str
=
tokenizer
.
decode
(
output_ids
[
0
],
skip_special_tokens
=
True
)
...
@@ -255,33 +219,41 @@ class TFXGLMModelLanguageGenerationTest(unittest.TestCase):
...
@@ -255,33 +219,41 @@ class TFXGLMModelLanguageGenerationTest(unittest.TestCase):
self
.
assertEqual
(
output_str
,
EXPECTED_OUTPUT_STR
)
self
.
assertEqual
(
output_str
,
EXPECTED_OUTPUT_STR
)
@
slow
@
slow
def
test_lm_generate_xglm_left_padding
(
self
):
def
test_batch_generation
(
self
):
"""Tests that the generated text is the same, regarless of left padding"""
tokenizer
=
XGLMTokenizer
.
from_pretrained
(
"facebook/xglm-564M"
)
model
=
TFXGLMForCausalLM
.
from_pretrained
(
"facebook/xglm-564M"
)
model
=
TFXGLMForCausalLM
.
from_pretrained
(
"facebook/xglm-564M"
)
tokenizer
=
XGLMTokenizer
.
from_pretrained
(
"facebook/xglm-564M"
)
tokenizer
.
padding_side
=
"left"
tokenizer
.
padding_side
=
"left"
generation_kwargs
=
{
# use different length sentences to test batching
"bad_words_ids"
:
[
tokenizer
(
"is"
).
input_ids
,
tokenizer
(
"angry about"
).
input_ids
],
sentences
=
[
"no_repeat_ngram_size"
:
2
,
"This is an extremelly long sentence that only exists to test the ability of the model to cope with "
"do_sample"
:
False
,
"left-padding, such as in batched generation. The output for the sequence below should be the same "
"repetition_penalty"
:
1.3
,
"regardless of whether left padding is applied or not. When"
,
}
"Hello, my dog is a little"
,
expected_output_string
=
(
]
"Today is a beautiful day and I am so glad that we have the opportunity to spend time with"
)
inputs
=
tokenizer
(
sentences
,
return_tensors
=
"tf"
,
padding
=
True
)
input_ids
=
inputs
[
"input_ids"
]
outputs
=
model
.
generate
(
input_ids
=
input_ids
,
attention_mask
=
inputs
[
"attention_mask"
],
max_new_tokens
=
12
)
sentences
=
[
"Today is a beautiful day and"
]
inputs_non_padded
=
tokenizer
(
sentences
[
0
],
return_tensors
=
"tf"
).
input_ids
input_ids
=
tokenizer
(
sentences
,
return_tensors
=
"tf"
,
padding
=
True
)
output_non_padded
=
model
.
generate
(
input_ids
=
inputs_non_padded
,
max_new_tokens
=
12
)
# using default length
output_ids
=
model
.
generate
(
**
input_ids
,
**
generation_kwargs
)
inputs_padded
=
tokenizer
(
sentences
[
1
],
return_tensors
=
"tf"
).
input_ids
output_strings
=
tokenizer
.
batch_decode
(
output_ids
,
skip_special_tokens
=
True
)
output_padded
=
model
.
generate
(
input_ids
=
inputs_padded
,
max_new_tokens
=
12
)
self
.
assertEqual
(
output_strings
[
0
],
expected_output_string
)
batch_out_sentence
=
tokenizer
.
batch_decode
(
outputs
,
skip_special_tokens
=
True
)
sentences
=
[
"Today is a beautiful day and"
,
"This is a very long input that we absolutely don't care about"
]
non_padded_sentence
=
tokenizer
.
decode
(
output_non_padded
[
0
],
skip_special_tokens
=
True
)
input_ids
=
tokenizer
(
sentences
,
return_tensors
=
"tf"
,
padding
=
True
)
padded_sentence
=
tokenizer
.
decode
(
output_padded
[
0
],
skip_special_tokens
=
True
)
# longer max length to capture the full length (remember: it is left padded)
output_ids
=
model
.
generate
(
**
input_ids
,
**
generation_kwargs
,
max_length
=
28
)
expected_output_sentence
=
[
output_strings
=
tokenizer
.
batch_decode
(
output_ids
,
skip_special_tokens
=
True
)
"This is an extremelly long sentence that only exists to test the ability of the model to cope with "
self
.
assertEqual
(
output_strings
[
0
],
expected_output_string
)
"left-padding, such as in batched generation. The output for the sequence below should be the same "
"regardless of whether left padding is applied or not. When left padding is applied, the sequence will be "
"a single"
,
"Hello, my dog is a little bit of a shy one, but he is very friendly"
,
]
self
.
assertListEqual
(
expected_output_sentence
,
batch_out_sentence
)
self
.
assertListEqual
(
expected_output_sentence
,
[
non_padded_sentence
,
padded_sentence
])
tests/models/xglm/test_modeling_xglm.py
View file @
4060d685
...
@@ -340,6 +340,35 @@ class XGLMModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
...
@@ -340,6 +340,35 @@ class XGLMModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_xglm_weight_initialization
(
*
config_and_inputs
)
self
.
model_tester
.
create_and_check_xglm_weight_initialization
(
*
config_and_inputs
)
@
slow
def
test_model_from_pretrained
(
self
):
for
model_name
in
XGLM_PRETRAINED_MODEL_ARCHIVE_LIST
[:
1
]:
model
=
XGLMModel
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
model
)
@
require_torch
class
XGLMModelLanguageGenerationTest
(
unittest
.
TestCase
):
def
_test_lm_generate_xglm_helper
(
self
,
gradient_checkpointing
=
False
,
verify_outputs
=
True
,
):
model
=
XGLMForCausalLM
.
from_pretrained
(
"facebook/xglm-564M"
)
if
gradient_checkpointing
:
model
.
gradient_checkpointing_enable
()
else
:
model
.
gradient_checkpointing_disable
()
model
.
to
(
torch_device
)
input_ids
=
torch
.
tensor
([[
2
,
268
,
9865
]],
dtype
=
torch
.
long
,
device
=
torch_device
)
# The dog
# </s> The dog is a very friendly dog. He is very affectionate and loves to play with other
# fmt: off
expected_output_ids
=
[
2
,
268
,
9865
,
67
,
11
,
1988
,
57252
,
9865
,
5
,
984
,
67
,
1988
,
213838
,
1658
,
53
,
70446
,
33
,
6657
,
278
,
1581
]
# fmt: on
output_ids
=
model
.
generate
(
input_ids
,
do_sample
=
False
,
num_beams
=
1
)
if
verify_outputs
:
self
.
assertListEqual
(
output_ids
[
0
].
tolist
(),
expected_output_ids
)
@
slow
@
slow
def
test_batch_generation
(
self
):
def
test_batch_generation
(
self
):
model
=
XGLMForCausalLM
.
from_pretrained
(
"facebook/xglm-564M"
)
model
=
XGLMForCausalLM
.
from_pretrained
(
"facebook/xglm-564M"
)
...
@@ -350,65 +379,39 @@ class XGLMModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
...
@@ -350,65 +379,39 @@ class XGLMModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
# use different length sentences to test batching
# use different length sentences to test batching
sentences
=
[
sentences
=
[
"This is an extremelly long sentence that only exists to test the ability of the model to cope with "
"left-padding, such as in batched generation. The output for the sequence below should be the same "
"regardless of whether left padding is applied or not. When"
,
"Hello, my dog is a little"
,
"Hello, my dog is a little"
,
"Today, I"
,
]
]
inputs
=
tokenizer
(
sentences
,
return_tensors
=
"pt"
,
padding
=
True
)
inputs
=
tokenizer
(
sentences
,
return_tensors
=
"pt"
,
padding
=
True
)
input_ids
=
inputs
[
"input_ids"
].
to
(
torch_device
)
input_ids
=
inputs
[
"input_ids"
].
to
(
torch_device
)
outputs
=
model
.
generate
(
outputs
=
model
.
generate
(
input_ids
=
input_ids
,
input_ids
=
input_ids
,
attention_mask
=
inputs
[
"attention_mask"
].
to
(
torch_device
),
max_new_tokens
=
12
attention_mask
=
inputs
[
"attention_mask"
].
to
(
torch_device
),
)
)
inputs_non_padded
=
tokenizer
(
sentences
[
0
],
return_tensors
=
"pt"
).
input_ids
.
to
(
torch_device
)
inputs_non_padded
=
tokenizer
(
sentences
[
0
],
return_tensors
=
"pt"
).
input_ids
.
to
(
torch_device
)
output_non_padded
=
model
.
generate
(
input_ids
=
inputs_non_padded
)
output_non_padded
=
model
.
generate
(
input_ids
=
inputs_non_padded
,
max_new_tokens
=
12
)
num_paddings
=
inputs_non_padded
.
shape
[
-
1
]
-
inputs
[
"attention_mask"
][
-
1
].
long
().
sum
().
cpu
().
item
()
inputs_padded
=
tokenizer
(
sentences
[
1
],
return_tensors
=
"pt"
).
input_ids
.
to
(
torch_device
)
inputs_padded
=
tokenizer
(
sentences
[
1
],
return_tensors
=
"pt"
).
input_ids
.
to
(
torch_device
)
output_padded
=
model
.
generate
(
input_ids
=
inputs_padded
,
max_
length
=
model
.
config
.
max_length
-
num_paddings
)
output_padded
=
model
.
generate
(
input_ids
=
inputs_padded
,
max_
new_tokens
=
12
)
batch_out_sentence
=
tokenizer
.
batch_decode
(
outputs
,
skip_special_tokens
=
True
)
batch_out_sentence
=
tokenizer
.
batch_decode
(
outputs
,
skip_special_tokens
=
True
)
non_padded_sentence
=
tokenizer
.
decode
(
output_non_padded
[
0
],
skip_special_tokens
=
True
)
non_padded_sentence
=
tokenizer
.
decode
(
output_non_padded
[
0
],
skip_special_tokens
=
True
)
padded_sentence
=
tokenizer
.
decode
(
output_padded
[
0
],
skip_special_tokens
=
True
)
padded_sentence
=
tokenizer
.
decode
(
output_padded
[
0
],
skip_special_tokens
=
True
)
expected_output_sentence
=
[
expected_output_sentence
=
[
"This is an extremelly long sentence that only exists to test the ability of the model to cope with "
"left-padding, such as in batched generation. The output for the sequence below should be the same "
"regardless of whether left padding is applied or not. When left padding is applied, the sequence will be "
"a single"
,
"Hello, my dog is a little bit of a shy one, but he is very friendly"
,
"Hello, my dog is a little bit of a shy one, but he is very friendly"
,
"Today, I am going to share with you a few of my favorite things"
,
]
]
self
.
assertListEqual
(
expected_output_sentence
,
batch_out_sentence
)
self
.
assertListEqual
(
expected_output_sentence
,
batch_out_sentence
)
self
.
assertListEqual
(
expected_output_sentence
,
[
non_padded_sentence
,
padded_sentence
])
self
.
assertListEqual
(
expected_output_sentence
,
[
non_padded_sentence
,
padded_sentence
])
@
slow
def
test_model_from_pretrained
(
self
):
for
model_name
in
XGLM_PRETRAINED_MODEL_ARCHIVE_LIST
[:
1
]:
model
=
XGLMModel
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
model
)
@
require_torch
class
XGLMModelLanguageGenerationTest
(
unittest
.
TestCase
):
def
_test_lm_generate_xglm_helper
(
self
,
gradient_checkpointing
=
False
,
verify_outputs
=
True
,
):
model
=
XGLMForCausalLM
.
from_pretrained
(
"facebook/xglm-564M"
)
if
gradient_checkpointing
:
model
.
gradient_checkpointing_enable
()
else
:
model
.
gradient_checkpointing_disable
()
model
.
to
(
torch_device
)
input_ids
=
torch
.
tensor
([[
2
,
268
,
9865
]],
dtype
=
torch
.
long
,
device
=
torch_device
)
# The dog
# </s> The dog is a very friendly dog. He is very affectionate and loves to play with other
# fmt: off
expected_output_ids
=
[
2
,
268
,
9865
,
67
,
11
,
1988
,
57252
,
9865
,
5
,
984
,
67
,
1988
,
213838
,
1658
,
53
,
70446
,
33
,
6657
,
278
,
1581
]
# fmt: on
output_ids
=
model
.
generate
(
input_ids
,
do_sample
=
False
,
num_beams
=
1
)
if
verify_outputs
:
self
.
assertListEqual
(
output_ids
[
0
].
tolist
(),
expected_output_ids
)
@
slow
@
slow
def
test_lm_generate_xglm
(
self
):
def
test_lm_generate_xglm
(
self
):
self
.
_test_lm_generate_xglm_helper
()
self
.
_test_lm_generate_xglm_helper
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment