Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
5e8c8eb5
Unverified
Commit
5e8c8eb5
authored
Feb 22, 2023
by
Aaron Gokaslan
Committed by
GitHub
Feb 22, 2023
Browse files
Apply ruff flake8-comprehensions (#21694)
parent
df06fb1f
Changes
230
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
44 additions
and
44 deletions
+44
-44
src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py
.../models/blenderbot_small/tokenization_blenderbot_small.py
+1
-1
src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py
...els/bloom/convert_bloom_original_checkpoint_to_pytorch.py
+2
-2
src/transformers/models/codegen/modeling_codegen.py
src/transformers/models/codegen/modeling_codegen.py
+1
-1
src/transformers/models/conditional_detr/image_processing_conditional_detr.py
...els/conditional_detr/image_processing_conditional_detr.py
+1
-1
src/transformers/models/convnext/modeling_convnext.py
src/transformers/models/convnext/modeling_convnext.py
+1
-1
src/transformers/models/ctrl/tokenization_ctrl.py
src/transformers/models/ctrl/tokenization_ctrl.py
+1
-1
src/transformers/models/data2vec/modeling_tf_data2vec_vision.py
...ansformers/models/data2vec/modeling_tf_data2vec_vision.py
+1
-1
src/transformers/models/deformable_detr/image_processing_deformable_detr.py
...odels/deformable_detr/image_processing_deformable_detr.py
+1
-1
src/transformers/models/detr/image_processing_detr.py
src/transformers/models/detr/image_processing_detr.py
+1
-1
src/transformers/models/dinat/modeling_dinat.py
src/transformers/models/dinat/modeling_dinat.py
+1
-1
src/transformers/models/donut/processing_donut.py
src/transformers/models/donut/processing_donut.py
+1
-1
src/transformers/models/ernie_m/tokenization_ernie_m.py
src/transformers/models/ernie_m/tokenization_ernie_m.py
+3
-3
src/transformers/models/esm/modeling_esmfold.py
src/transformers/models/esm/modeling_esmfold.py
+2
-2
src/transformers/models/esm/openfold_utils/chunk_utils.py
src/transformers/models/esm/openfold_utils/chunk_utils.py
+1
-1
src/transformers/models/flaubert/tokenization_flaubert.py
src/transformers/models/flaubert/tokenization_flaubert.py
+4
-4
src/transformers/models/fsmt/tokenization_fsmt.py
src/transformers/models/fsmt/tokenization_fsmt.py
+4
-4
src/transformers/models/gptj/modeling_gptj.py
src/transformers/models/gptj/modeling_gptj.py
+1
-1
src/transformers/models/herbert/tokenization_herbert.py
src/transformers/models/herbert/tokenization_herbert.py
+4
-4
src/transformers/models/jukebox/modeling_jukebox.py
src/transformers/models/jukebox/modeling_jukebox.py
+12
-12
src/transformers/models/jukebox/tokenization_jukebox.py
src/transformers/models/jukebox/tokenization_jukebox.py
+1
-1
No files found.
src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py
View file @
5e8c8eb5
...
@@ -191,7 +191,7 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer):
...
@@ -191,7 +191,7 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer):
words
=
re
.
findall
(
r
"\S+\n?"
,
text
)
words
=
re
.
findall
(
r
"\S+\n?"
,
text
)
for
token
in
words
:
for
token
in
words
:
split_tokens
.
extend
(
[
t
for
t
in
self
.
bpe
(
token
).
split
(
" "
)
]
)
split_tokens
.
extend
(
list
(
self
.
bpe
(
token
).
split
(
" "
)
)
)
return
split_tokens
return
split_tokens
def
_convert_token_to_id
(
self
,
token
:
str
)
->
int
:
def
_convert_token_to_id
(
self
,
token
:
str
)
->
int
:
...
...
src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py
View file @
5e8c8eb5
...
@@ -89,7 +89,7 @@ def convert_bloom_checkpoint_to_pytorch(
...
@@ -89,7 +89,7 @@ def convert_bloom_checkpoint_to_pytorch(
if
shard_model
:
if
shard_model
:
file_names
=
os
.
listdir
(
bloom_checkpoint_path
)
file_names
=
os
.
listdir
(
bloom_checkpoint_path
)
file_names
=
list
(
sorted
(
filter
(
lambda
s
:
s
.
startswith
(
"layer"
)
and
"model_00"
in
s
,
file_names
))
)
file_names
=
sorted
(
filter
(
lambda
s
:
s
.
startswith
(
"layer"
)
and
"model_00"
in
s
,
file_names
))
index_dict
=
{
"weight_map"
:
{},
"metadata"
:
{}}
index_dict
=
{
"weight_map"
:
{},
"metadata"
:
{}}
total_size
=
0
total_size
=
0
...
@@ -157,7 +157,7 @@ def convert_bloom_checkpoint_to_pytorch(
...
@@ -157,7 +157,7 @@ def convert_bloom_checkpoint_to_pytorch(
model
=
BloomModel
(
config
)
model
=
BloomModel
(
config
)
file_names
=
os
.
listdir
(
bloom_checkpoint_path
)
file_names
=
os
.
listdir
(
bloom_checkpoint_path
)
file_names
=
list
(
sorted
(
filter
(
lambda
s
:
s
.
startswith
(
"layer"
)
and
"model_00"
in
s
,
file_names
))
)
file_names
=
sorted
(
filter
(
lambda
s
:
s
.
startswith
(
"layer"
)
and
"model_00"
in
s
,
file_names
))
missing_keys
=
None
missing_keys
=
None
for
i
,
file
in
enumerate
(
file_names
):
for
i
,
file
in
enumerate
(
file_names
):
...
...
src/transformers/models/codegen/modeling_codegen.py
View file @
5e8c8eb5
...
@@ -85,7 +85,7 @@ def duplicate_interleave(m):
...
@@ -85,7 +85,7 @@ def duplicate_interleave(m):
# Copied from transformers.models.gptj.modeling_gptj.apply_rotary_pos_emb
# Copied from transformers.models.gptj.modeling_gptj.apply_rotary_pos_emb
def
apply_rotary_pos_emb
(
x
,
sincos
,
offset
=
0
):
def
apply_rotary_pos_emb
(
x
,
sincos
,
offset
=
0
):
sin
,
cos
=
map
(
lambda
t
:
duplicate_interleave
(
t
)[
None
,
offset
:
x
.
shape
[
1
]
+
offset
,
None
,
:]
,
sincos
)
sin
,
cos
=
(
duplicate_interleave
(
t
)[
None
,
offset
:
x
.
shape
[
1
]
+
offset
,
None
,
:]
for
t
in
sincos
)
# einsum notation for lambda t: repeat(t[offset:x.shape[1]+offset,:], "n d -> () n () (d j)", j=2)
# einsum notation for lambda t: repeat(t[offset:x.shape[1]+offset,:], "n d -> () n () (d j)", j=2)
return
(
x
*
cos
)
+
(
rotate_every_two
(
x
)
*
sin
)
return
(
x
*
cos
)
+
(
rotate_every_two
(
x
)
*
sin
)
...
...
src/transformers/models/conditional_detr/image_processing_conditional_detr.py
View file @
5e8c8eb5
...
@@ -604,7 +604,7 @@ def binary_mask_to_rle(mask):
...
@@ -604,7 +604,7 @@ def binary_mask_to_rle(mask):
pixels
=
np
.
concatenate
([[
0
],
pixels
,
[
0
]])
pixels
=
np
.
concatenate
([[
0
],
pixels
,
[
0
]])
runs
=
np
.
where
(
pixels
[
1
:]
!=
pixels
[:
-
1
])[
0
]
+
1
runs
=
np
.
where
(
pixels
[
1
:]
!=
pixels
[:
-
1
])[
0
]
+
1
runs
[
1
::
2
]
-=
runs
[::
2
]
runs
[
1
::
2
]
-=
runs
[::
2
]
return
[
x
for
x
in
runs
]
return
list
(
runs
)
# Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle
# Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle
...
...
src/transformers/models/convnext/modeling_convnext.py
View file @
5e8c8eb5
...
@@ -495,7 +495,7 @@ class ConvNextBackbone(ConvNextPreTrainedModel, BackboneMixin):
...
@@ -495,7 +495,7 @@ class ConvNextBackbone(ConvNextPreTrainedModel, BackboneMixin):
self
.
out_feature_channels
=
out_feature_channels
self
.
out_feature_channels
=
out_feature_channels
# Add layer norms to hidden states of out_features
# Add layer norms to hidden states of out_features
hidden_states_norms
=
dict
()
hidden_states_norms
=
{}
for
stage
,
num_channels
in
zip
(
self
.
out_features
,
self
.
channels
):
for
stage
,
num_channels
in
zip
(
self
.
out_features
,
self
.
channels
):
hidden_states_norms
[
stage
]
=
ConvNextLayerNorm
(
num_channels
,
data_format
=
"channels_first"
)
hidden_states_norms
[
stage
]
=
ConvNextLayerNorm
(
num_channels
,
data_format
=
"channels_first"
)
self
.
hidden_states_norms
=
nn
.
ModuleDict
(
hidden_states_norms
)
self
.
hidden_states_norms
=
nn
.
ModuleDict
(
hidden_states_norms
)
...
...
src/transformers/models/ctrl/tokenization_ctrl.py
View file @
5e8c8eb5
...
@@ -208,7 +208,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
...
@@ -208,7 +208,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
words
=
re
.
findall
(
r
"\S+\n?"
,
text
)
words
=
re
.
findall
(
r
"\S+\n?"
,
text
)
for
token
in
words
:
for
token
in
words
:
split_tokens
.
extend
(
[
t
for
t
in
self
.
bpe
(
token
).
split
(
" "
)
]
)
split_tokens
.
extend
(
list
(
self
.
bpe
(
token
).
split
(
" "
)
)
)
return
split_tokens
return
split_tokens
def
_convert_token_to_id
(
self
,
token
):
def
_convert_token_to_id
(
self
,
token
):
...
...
src/transformers/models/data2vec/modeling_tf_data2vec_vision.py
View file @
5e8c8eb5
...
@@ -596,7 +596,7 @@ class TFData2VecVisionEncoder(tf.keras.layers.Layer):
...
@@ -596,7 +596,7 @@ class TFData2VecVisionEncoder(tf.keras.layers.Layer):
self
.
relative_position_bias
=
None
self
.
relative_position_bias
=
None
# stochastic depth decay rule
# stochastic depth decay rule
dpr
=
[
x
for
x
in
tf
.
linspace
(
0.0
,
config
.
drop_path_rate
,
config
.
num_hidden_layers
)
]
dpr
=
list
(
tf
.
linspace
(
0.0
,
config
.
drop_path_rate
,
config
.
num_hidden_layers
)
)
self
.
layer
=
[
self
.
layer
=
[
TFData2VecVisionLayer
(
TFData2VecVisionLayer
(
config
,
config
,
...
...
src/transformers/models/deformable_detr/image_processing_deformable_detr.py
View file @
5e8c8eb5
...
@@ -602,7 +602,7 @@ def binary_mask_to_rle(mask):
...
@@ -602,7 +602,7 @@ def binary_mask_to_rle(mask):
pixels
=
np
.
concatenate
([[
0
],
pixels
,
[
0
]])
pixels
=
np
.
concatenate
([[
0
],
pixels
,
[
0
]])
runs
=
np
.
where
(
pixels
[
1
:]
!=
pixels
[:
-
1
])[
0
]
+
1
runs
=
np
.
where
(
pixels
[
1
:]
!=
pixels
[:
-
1
])[
0
]
+
1
runs
[
1
::
2
]
-=
runs
[::
2
]
runs
[
1
::
2
]
-=
runs
[::
2
]
return
[
x
for
x
in
runs
]
return
list
(
runs
)
# Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle
# Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle
...
...
src/transformers/models/detr/image_processing_detr.py
View file @
5e8c8eb5
...
@@ -590,7 +590,7 @@ def binary_mask_to_rle(mask):
...
@@ -590,7 +590,7 @@ def binary_mask_to_rle(mask):
pixels
=
np
.
concatenate
([[
0
],
pixels
,
[
0
]])
pixels
=
np
.
concatenate
([[
0
],
pixels
,
[
0
]])
runs
=
np
.
where
(
pixels
[
1
:]
!=
pixels
[:
-
1
])[
0
]
+
1
runs
=
np
.
where
(
pixels
[
1
:]
!=
pixels
[:
-
1
])[
0
]
+
1
runs
[
1
::
2
]
-=
runs
[::
2
]
runs
[
1
::
2
]
-=
runs
[::
2
]
return
[
x
for
x
in
runs
]
return
list
(
runs
)
# TODO - (Amy) make compatible with other frameworks
# TODO - (Amy) make compatible with other frameworks
...
...
src/transformers/models/dinat/modeling_dinat.py
View file @
5e8c8eb5
...
@@ -899,7 +899,7 @@ class DinatBackbone(DinatPreTrainedModel, BackboneMixin):
...
@@ -899,7 +899,7 @@ class DinatBackbone(DinatPreTrainedModel, BackboneMixin):
self
.
out_feature_channels
[
stage
]
=
num_features
[
i
]
self
.
out_feature_channels
[
stage
]
=
num_features
[
i
]
# Add layer norms to hidden states of out_features
# Add layer norms to hidden states of out_features
hidden_states_norms
=
dict
()
hidden_states_norms
=
{}
for
stage
,
num_channels
in
zip
(
self
.
out_features
,
self
.
channels
):
for
stage
,
num_channels
in
zip
(
self
.
out_features
,
self
.
channels
):
hidden_states_norms
[
stage
]
=
nn
.
LayerNorm
(
num_channels
)
hidden_states_norms
[
stage
]
=
nn
.
LayerNorm
(
num_channels
)
self
.
hidden_states_norms
=
nn
.
ModuleDict
(
hidden_states_norms
)
self
.
hidden_states_norms
=
nn
.
ModuleDict
(
hidden_states_norms
)
...
...
src/transformers/models/donut/processing_donut.py
View file @
5e8c8eb5
...
@@ -130,7 +130,7 @@ class DonutProcessor(ProcessorMixin):
...
@@ -130,7 +130,7 @@ class DonutProcessor(ProcessorMixin):
if
added_vocab
is
None
:
if
added_vocab
is
None
:
added_vocab
=
self
.
tokenizer
.
get_added_vocab
()
added_vocab
=
self
.
tokenizer
.
get_added_vocab
()
output
=
dict
()
output
=
{}
while
tokens
:
while
tokens
:
start_token
=
re
.
search
(
r
"<s_(.*?)>"
,
tokens
,
re
.
IGNORECASE
)
start_token
=
re
.
search
(
r
"<s_(.*?)>"
,
tokens
,
re
.
IGNORECASE
)
...
...
src/transformers/models/ernie_m/tokenization_ernie_m.py
View file @
5e8c8eb5
...
@@ -133,8 +133,8 @@ class ErnieMTokenizer(PreTrainedTokenizer):
...
@@ -133,8 +133,8 @@ class ErnieMTokenizer(PreTrainedTokenizer):
if
vocab_file
is
not
None
:
if
vocab_file
is
not
None
:
self
.
vocab
=
self
.
load_vocab
(
filepath
=
vocab_file
)
self
.
vocab
=
self
.
load_vocab
(
filepath
=
vocab_file
)
else
:
else
:
self
.
vocab
=
dict
((
self
.
sp_model
.
id_to_piece
(
id
)
,
id
)
for
id
in
range
(
self
.
sp_model
.
get_piece_size
())
)
self
.
vocab
=
{
self
.
sp_model
.
id_to_piece
(
id
)
:
id
for
id
in
range
(
self
.
sp_model
.
get_piece_size
())
}
self
.
reverse_vocab
=
dict
((
v
,
k
)
for
k
,
v
in
self
.
vocab
.
items
()
)
self
.
reverse_vocab
=
{
v
:
k
for
k
,
v
in
self
.
vocab
.
items
()
}
def
get_offset_mapping
(
self
,
text
):
def
get_offset_mapping
(
self
,
text
):
if
text
is
None
:
if
text
is
None
:
...
@@ -325,7 +325,7 @@ class ErnieMTokenizer(PreTrainedTokenizer):
...
@@ -325,7 +325,7 @@ class ErnieMTokenizer(PreTrainedTokenizer):
"You should not supply a second sequence if the provided sequence of "
"You should not supply a second sequence if the provided sequence of "
"ids is already formatted with special tokens for the model."
"ids is already formatted with special tokens for the model."
)
)
return
list
(
map
(
lambda
x
:
1
if
x
in
[
self
.
sep_token_id
,
self
.
cls_token_id
]
else
0
,
token_ids_0
))
return
[
1
if
x
in
[
self
.
sep_token_id
,
self
.
cls_token_id
]
else
0
for
x
in
token_ids_0
]
if
token_ids_1
is
not
None
:
if
token_ids_1
is
not
None
:
return
[
1
]
+
([
0
]
*
len
(
token_ids_0
))
+
[
1
,
1
]
+
([
0
]
*
len
(
token_ids_1
))
+
[
1
]
return
[
1
]
+
([
0
]
*
len
(
token_ids_0
))
+
[
1
,
1
]
+
([
0
]
*
len
(
token_ids_1
))
+
[
1
]
...
...
src/transformers/models/esm/modeling_esmfold.py
View file @
5e8c8eb5
...
@@ -201,9 +201,9 @@ def collate_dense_tensors(samples: List[torch.Tensor], pad_v: float = 0) -> torc
...
@@ -201,9 +201,9 @@ def collate_dense_tensors(samples: List[torch.Tensor], pad_v: float = 0) -> torc
"""
"""
if
len
(
samples
)
==
0
:
if
len
(
samples
)
==
0
:
return
torch
.
Tensor
()
return
torch
.
Tensor
()
if
len
(
set
(
x
.
dim
()
for
x
in
samples
)
)
!=
1
:
if
len
(
{
x
.
dim
()
for
x
in
samples
}
)
!=
1
:
raise
RuntimeError
(
f
"Samples has varying dimensions:
{
[
x
.
dim
()
for
x
in
samples
]
}
"
)
raise
RuntimeError
(
f
"Samples has varying dimensions:
{
[
x
.
dim
()
for
x
in
samples
]
}
"
)
(
device
,)
=
tuple
(
set
(
x
.
device
for
x
in
samples
)
)
# assumes all on same device
(
device
,)
=
tuple
(
{
x
.
device
for
x
in
samples
}
)
# assumes all on same device
max_shape
=
[
max
(
lst
)
for
lst
in
zip
(
*
[
x
.
shape
for
x
in
samples
])]
max_shape
=
[
max
(
lst
)
for
lst
in
zip
(
*
[
x
.
shape
for
x
in
samples
])]
result
=
torch
.
empty
(
len
(
samples
),
*
max_shape
,
dtype
=
samples
[
0
].
dtype
,
device
=
device
)
result
=
torch
.
empty
(
len
(
samples
),
*
max_shape
,
dtype
=
samples
[
0
].
dtype
,
device
=
device
)
result
.
fill_
(
pad_v
)
result
.
fill_
(
pad_v
)
...
...
src/transformers/models/esm/openfold_utils/chunk_utils.py
View file @
5e8c8eb5
...
@@ -83,7 +83,7 @@ def _get_minimal_slice_set(
...
@@ -83,7 +83,7 @@ def _get_minimal_slice_set(
# Base cases. Either start/end are empty and we're done, or the final,
# Base cases. Either start/end are empty and we're done, or the final,
# one-dimensional tensor can be simply sliced
# one-dimensional tensor can be simply sliced
if
len
(
start
)
==
0
:
if
len
(
start
)
==
0
:
return
[
tuple
()]
return
[()]
elif
len
(
start
)
==
1
:
elif
len
(
start
)
==
1
:
return
[(
slice
(
start
[
0
],
end
[
0
]
+
1
),)]
return
[(
slice
(
start
[
0
],
end
[
0
]
+
1
),)]
...
...
src/transformers/models/flaubert/tokenization_flaubert.py
View file @
5e8c8eb5
...
@@ -282,10 +282,10 @@ class FlaubertTokenizer(PreTrainedTokenizer):
...
@@ -282,10 +282,10 @@ class FlaubertTokenizer(PreTrainedTokenizer):
self
.
sm
=
sacremoses
self
.
sm
=
sacremoses
# cache of sm.MosesPunctNormalizer instance
# cache of sm.MosesPunctNormalizer instance
self
.
cache_moses_punct_normalizer
=
dict
()
self
.
cache_moses_punct_normalizer
=
{}
# cache of sm.MosesTokenizer instance
# cache of sm.MosesTokenizer instance
self
.
cache_moses_tokenizer
=
dict
()
self
.
cache_moses_tokenizer
=
{}
self
.
lang_with_custom_tokenizer
=
set
([
"zh"
,
"th"
,
"ja"
])
self
.
lang_with_custom_tokenizer
=
{
"zh"
,
"th"
,
"ja"
}
self
.
lang2id
=
lang2id
self
.
lang2id
=
lang2id
self
.
id2lang
=
id2lang
self
.
id2lang
=
id2lang
if
lang2id
is
not
None
and
id2lang
is
not
None
:
if
lang2id
is
not
None
and
id2lang
is
not
None
:
...
@@ -452,7 +452,7 @@ class FlaubertTokenizer(PreTrainedTokenizer):
...
@@ -452,7 +452,7 @@ class FlaubertTokenizer(PreTrainedTokenizer):
split_tokens
=
[]
split_tokens
=
[]
for
token
in
text
:
for
token
in
text
:
if
token
:
if
token
:
split_tokens
.
extend
(
[
t
for
t
in
self
.
bpe
(
token
).
split
(
" "
)
]
)
split_tokens
.
extend
(
list
(
self
.
bpe
(
token
).
split
(
" "
)
)
)
return
split_tokens
return
split_tokens
...
...
src/transformers/models/fsmt/tokenization_fsmt.py
View file @
5e8c8eb5
...
@@ -226,10 +226,10 @@ class FSMTTokenizer(PreTrainedTokenizer):
...
@@ -226,10 +226,10 @@ class FSMTTokenizer(PreTrainedTokenizer):
self
.
do_lower_case
=
do_lower_case
self
.
do_lower_case
=
do_lower_case
# cache of sm.MosesPunctNormalizer instance
# cache of sm.MosesPunctNormalizer instance
self
.
cache_moses_punct_normalizer
=
dict
()
self
.
cache_moses_punct_normalizer
=
{}
# cache of sm.MosesTokenizer instance
# cache of sm.MosesTokenizer instance
self
.
cache_moses_tokenizer
=
dict
()
self
.
cache_moses_tokenizer
=
{}
self
.
cache_moses_detokenizer
=
dict
()
self
.
cache_moses_detokenizer
=
{}
if
langs
and
len
(
langs
)
==
2
:
if
langs
and
len
(
langs
)
==
2
:
self
.
src_lang
,
self
.
tgt_lang
=
langs
self
.
src_lang
,
self
.
tgt_lang
=
langs
...
@@ -379,7 +379,7 @@ class FSMTTokenizer(PreTrainedTokenizer):
...
@@ -379,7 +379,7 @@ class FSMTTokenizer(PreTrainedTokenizer):
split_tokens
=
[]
split_tokens
=
[]
for
token
in
text
:
for
token
in
text
:
if
token
:
if
token
:
split_tokens
.
extend
(
[
t
for
t
in
self
.
bpe
(
token
).
split
(
" "
)
]
)
split_tokens
.
extend
(
list
(
self
.
bpe
(
token
).
split
(
" "
)
)
)
return
split_tokens
return
split_tokens
...
...
src/transformers/models/gptj/modeling_gptj.py
View file @
5e8c8eb5
...
@@ -78,7 +78,7 @@ def duplicate_interleave(m):
...
@@ -78,7 +78,7 @@ def duplicate_interleave(m):
def
apply_rotary_pos_emb
(
x
,
sincos
,
offset
=
0
):
def
apply_rotary_pos_emb
(
x
,
sincos
,
offset
=
0
):
sin
,
cos
=
map
(
lambda
t
:
duplicate_interleave
(
t
)[
None
,
offset
:
x
.
shape
[
1
]
+
offset
,
None
,
:]
,
sincos
)
sin
,
cos
=
(
duplicate_interleave
(
t
)[
None
,
offset
:
x
.
shape
[
1
]
+
offset
,
None
,
:]
for
t
in
sincos
)
# einsum notation for lambda t: repeat(t[offset:x.shape[1]+offset,:], "n d -> () n () (d j)", j=2)
# einsum notation for lambda t: repeat(t[offset:x.shape[1]+offset,:], "n d -> () n () (d j)", j=2)
return
(
x
*
cos
)
+
(
rotate_every_two
(
x
)
*
sin
)
return
(
x
*
cos
)
+
(
rotate_every_two
(
x
)
*
sin
)
...
...
src/transformers/models/herbert/tokenization_herbert.py
View file @
5e8c8eb5
...
@@ -348,10 +348,10 @@ class HerbertTokenizer(PreTrainedTokenizer):
...
@@ -348,10 +348,10 @@ class HerbertTokenizer(PreTrainedTokenizer):
self
.
sm
=
sacremoses
self
.
sm
=
sacremoses
# cache of sm.MosesPunctNormalizer instance
# cache of sm.MosesPunctNormalizer instance
self
.
cache_moses_punct_normalizer
=
dict
()
self
.
cache_moses_punct_normalizer
=
{}
# cache of sm.MosesTokenizer instance
# cache of sm.MosesTokenizer instance
self
.
cache_moses_tokenizer
=
dict
()
self
.
cache_moses_tokenizer
=
{}
self
.
lang_with_custom_tokenizer
=
set
([
"zh"
,
"th"
,
"ja"
])
self
.
lang_with_custom_tokenizer
=
{
"zh"
,
"th"
,
"ja"
}
# True for current supported model (v1.2.0), False for XLM-17 & 100
# True for current supported model (v1.2.0), False for XLM-17 & 100
self
.
do_lowercase_and_remove_accent
=
do_lowercase_and_remove_accent
self
.
do_lowercase_and_remove_accent
=
do_lowercase_and_remove_accent
self
.
lang2id
=
lang2id
self
.
lang2id
=
lang2id
...
@@ -490,7 +490,7 @@ class HerbertTokenizer(PreTrainedTokenizer):
...
@@ -490,7 +490,7 @@ class HerbertTokenizer(PreTrainedTokenizer):
split_tokens
=
[]
split_tokens
=
[]
for
token
in
pre_tokens
:
for
token
in
pre_tokens
:
if
token
:
if
token
:
split_tokens
.
extend
(
[
t
for
t
in
self
.
bpe
(
token
).
split
(
" "
)
]
)
split_tokens
.
extend
(
list
(
self
.
bpe
(
token
).
split
(
" "
)
)
)
return
split_tokens
return
split_tokens
...
...
src/transformers/models/jukebox/modeling_jukebox.py
View file @
5e8c8eb5
...
@@ -138,7 +138,7 @@ def get_alignment(music_tokens, labels, prior, config):
...
@@ -138,7 +138,7 @@ def get_alignment(music_tokens, labels, prior, config):
hop_length
=
int
(
config
.
hop_fraction
[
-
level
-
1
]
*
prior
.
n_ctx
)
hop_length
=
int
(
config
.
hop_fraction
[
-
level
-
1
]
*
prior
.
n_ctx
)
alignment_head
,
alignment_layer
=
config
.
prior_alignment_head
[
0
],
config
.
prior_alignment_layer
[
0
]
alignment_head
,
alignment_layer
=
config
.
prior_alignment_head
[
0
],
config
.
prior_alignment_layer
[
0
]
attn_layers
=
set
([
alignment_layer
])
attn_layers
=
{
alignment_layer
}
alignment_hops
=
{}
alignment_hops
=
{}
indices_hops
=
{}
indices_hops
=
{}
for
start
in
tqdm
(
get_starts
(
total_length
,
n_ctx
,
hop_length
),
desc
=
"Computing lyric to music alignment "
):
for
start
in
tqdm
(
get_starts
(
total_length
,
n_ctx
,
hop_length
),
desc
=
"Computing lyric to music alignment "
):
...
@@ -436,7 +436,7 @@ class JukeboxBottleneckBlock(nn.Module):
...
@@ -436,7 +436,7 @@ class JukeboxBottleneckBlock(nn.Module):
used_curr
=
(
_codebook_elem
>=
self
.
threshold
).
sum
()
used_curr
=
(
_codebook_elem
>=
self
.
threshold
).
sum
()
usage
=
torch
.
sum
(
usage
)
usage
=
torch
.
sum
(
usage
)
dk
=
torch
.
norm
(
self
.
codebook
-
old_codebook
)
/
np
.
sqrt
(
np
.
prod
(
old_codebook
.
shape
))
dk
=
torch
.
norm
(
self
.
codebook
-
old_codebook
)
/
np
.
sqrt
(
np
.
prod
(
old_codebook
.
shape
))
return
dict
(
entropy
=
entropy
,
used_curr
=
used_curr
,
usage
=
usage
,
dk
=
dk
)
return
{
"
entropy
"
:
entropy
,
"
used_curr
"
:
used_curr
,
"
usage
"
:
usage
,
"
dk
"
:
dk
}
def
preprocess
(
self
,
hidden_states
):
def
preprocess
(
self
,
hidden_states
):
hidden_states
=
hidden_states
.
permute
(
0
,
2
,
1
).
contiguous
()
hidden_states
=
hidden_states
.
permute
(
0
,
2
,
1
).
contiguous
()
...
@@ -2213,11 +2213,11 @@ class JukeboxPrior(PreTrainedModel):
...
@@ -2213,11 +2213,11 @@ class JukeboxPrior(PreTrainedModel):
loss
=
self
.
encoder_loss_fraction
*
encoder_loss
*
self
.
nb_relevant_lyric_tokens
/
self
.
total_loss_dims
loss
=
self
.
encoder_loss_fraction
*
encoder_loss
*
self
.
nb_relevant_lyric_tokens
/
self
.
total_loss_dims
loss
+=
next_token_prediction_loss
*
self
.
next_token_prediction_loss_dims
/
self
.
total_loss_dims
loss
+=
next_token_prediction_loss
*
self
.
next_token_prediction_loss_dims
/
self
.
total_loss_dims
metrics
=
dict
(
metrics
=
{
bpd
=
next_token_prediction_loss
.
clone
().
detach
(),
"
bpd
"
:
next_token_prediction_loss
.
clone
().
detach
(),
encoder_loss
=
encoder_loss
.
clone
().
detach
(),
"
encoder_loss
"
:
encoder_loss
.
clone
().
detach
(),
next_token_prediction_loss
=
next_token_prediction_loss
.
clone
().
detach
(),
"
next_token_prediction_loss
"
:
next_token_prediction_loss
.
clone
().
detach
(),
)
}
if
get_preds
:
if
get_preds
:
metrics
[
"preds"
]
=
preds
.
clone
().
detach
()
metrics
[
"preds"
]
=
preds
.
clone
().
detach
()
if
get_attn_weights
:
if
get_attn_weights
:
...
@@ -2533,11 +2533,11 @@ class JukeboxModel(JukeboxPreTrainedModel):
...
@@ -2533,11 +2533,11 @@ class JukeboxModel(JukeboxPreTrainedModel):
# total length of the signal, might be bit different from the actual generated length
# total length of the signal, might be bit different from the actual generated length
self
.
total_length
=
total_length
self
.
total_length
=
total_length
for
level
in
sample_levels
:
for
level
in
sample_levels
:
sampling_kwargs
=
dict
(
sampling_kwargs
=
{
temp
=
0.99
if
level
==
len
(
self
.
priors
)
-
1
else
sampling_temperature
,
"
temp
"
:
0.99
if
level
==
len
(
self
.
priors
)
-
1
else
sampling_temperature
,
chunk_size
=
chunk_size
,
"
chunk_size
"
:
chunk_size
,
sample_tokens
=
sample_tokens
,
"
sample_tokens
"
:
sample_tokens
,
)
}
# Set correct total_length, hop_length, labels and sampling_kwargs for level
# Set correct total_length, hop_length, labels and sampling_kwargs for level
total_token_to_sample
=
total_length
//
self
.
priors
[
level
].
raw_to_tokens
total_token_to_sample
=
total_length
//
self
.
priors
[
level
].
raw_to_tokens
...
...
src/transformers/models/jukebox/tokenization_jukebox.py
View file @
5e8c8eb5
...
@@ -187,7 +187,7 @@ class JukeboxTokenizer(PreTrainedTokenizer):
...
@@ -187,7 +187,7 @@ class JukeboxTokenizer(PreTrainedTokenizer):
Do NOT take care of added tokens. Only the lyrics are split into character for the character-based vocabulary.
Do NOT take care of added tokens. Only the lyrics are split into character for the character-based vocabulary.
"""
"""
# only lyrics are not tokenized, but character based is easily handled
# only lyrics are not tokenized, but character based is easily handled
return
[
character
for
character
in
lyrics
]
return
list
(
lyrics
)
def
tokenize
(
self
,
artist
,
genre
,
lyrics
,
**
kwargs
):
def
tokenize
(
self
,
artist
,
genre
,
lyrics
,
**
kwargs
):
"""
"""
...
...
Prev
1
2
3
4
5
6
7
8
9
10
…
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment