Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
5e8c8eb5
"tests/models/vscode:/vscode.git/clone" did not exist on "9402788b34fbc6581ae9d7d9d68612a96d9aa111"
Unverified
Commit
5e8c8eb5
authored
Feb 22, 2023
by
Aaron Gokaslan
Committed by
GitHub
Feb 22, 2023
Browse files
Apply ruff flake8-comprehensions (#21694)
parent
df06fb1f
Changes
230
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
44 additions
and
44 deletions
+44
-44
src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py
.../models/blenderbot_small/tokenization_blenderbot_small.py
+1
-1
src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py
...els/bloom/convert_bloom_original_checkpoint_to_pytorch.py
+2
-2
src/transformers/models/codegen/modeling_codegen.py
src/transformers/models/codegen/modeling_codegen.py
+1
-1
src/transformers/models/conditional_detr/image_processing_conditional_detr.py
...els/conditional_detr/image_processing_conditional_detr.py
+1
-1
src/transformers/models/convnext/modeling_convnext.py
src/transformers/models/convnext/modeling_convnext.py
+1
-1
src/transformers/models/ctrl/tokenization_ctrl.py
src/transformers/models/ctrl/tokenization_ctrl.py
+1
-1
src/transformers/models/data2vec/modeling_tf_data2vec_vision.py
...ansformers/models/data2vec/modeling_tf_data2vec_vision.py
+1
-1
src/transformers/models/deformable_detr/image_processing_deformable_detr.py
...odels/deformable_detr/image_processing_deformable_detr.py
+1
-1
src/transformers/models/detr/image_processing_detr.py
src/transformers/models/detr/image_processing_detr.py
+1
-1
src/transformers/models/dinat/modeling_dinat.py
src/transformers/models/dinat/modeling_dinat.py
+1
-1
src/transformers/models/donut/processing_donut.py
src/transformers/models/donut/processing_donut.py
+1
-1
src/transformers/models/ernie_m/tokenization_ernie_m.py
src/transformers/models/ernie_m/tokenization_ernie_m.py
+3
-3
src/transformers/models/esm/modeling_esmfold.py
src/transformers/models/esm/modeling_esmfold.py
+2
-2
src/transformers/models/esm/openfold_utils/chunk_utils.py
src/transformers/models/esm/openfold_utils/chunk_utils.py
+1
-1
src/transformers/models/flaubert/tokenization_flaubert.py
src/transformers/models/flaubert/tokenization_flaubert.py
+4
-4
src/transformers/models/fsmt/tokenization_fsmt.py
src/transformers/models/fsmt/tokenization_fsmt.py
+4
-4
src/transformers/models/gptj/modeling_gptj.py
src/transformers/models/gptj/modeling_gptj.py
+1
-1
src/transformers/models/herbert/tokenization_herbert.py
src/transformers/models/herbert/tokenization_herbert.py
+4
-4
src/transformers/models/jukebox/modeling_jukebox.py
src/transformers/models/jukebox/modeling_jukebox.py
+12
-12
src/transformers/models/jukebox/tokenization_jukebox.py
src/transformers/models/jukebox/tokenization_jukebox.py
+1
-1
No files found.
src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py
View file @
5e8c8eb5
...
@@ -191,7 +191,7 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer):
...
@@ -191,7 +191,7 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer):
words
=
re
.
findall
(
r
"\S+\n?"
,
text
)
words
=
re
.
findall
(
r
"\S+\n?"
,
text
)
for
token
in
words
:
for
token
in
words
:
split_tokens
.
extend
(
[
t
for
t
in
self
.
bpe
(
token
).
split
(
" "
)
]
)
split_tokens
.
extend
(
list
(
self
.
bpe
(
token
).
split
(
" "
)
)
)
return
split_tokens
return
split_tokens
def
_convert_token_to_id
(
self
,
token
:
str
)
->
int
:
def
_convert_token_to_id
(
self
,
token
:
str
)
->
int
:
...
...
src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py
View file @
5e8c8eb5
...
@@ -89,7 +89,7 @@ def convert_bloom_checkpoint_to_pytorch(
...
@@ -89,7 +89,7 @@ def convert_bloom_checkpoint_to_pytorch(
if
shard_model
:
if
shard_model
:
file_names
=
os
.
listdir
(
bloom_checkpoint_path
)
file_names
=
os
.
listdir
(
bloom_checkpoint_path
)
file_names
=
list
(
sorted
(
filter
(
lambda
s
:
s
.
startswith
(
"layer"
)
and
"model_00"
in
s
,
file_names
))
)
file_names
=
sorted
(
filter
(
lambda
s
:
s
.
startswith
(
"layer"
)
and
"model_00"
in
s
,
file_names
))
index_dict
=
{
"weight_map"
:
{},
"metadata"
:
{}}
index_dict
=
{
"weight_map"
:
{},
"metadata"
:
{}}
total_size
=
0
total_size
=
0
...
@@ -157,7 +157,7 @@ def convert_bloom_checkpoint_to_pytorch(
...
@@ -157,7 +157,7 @@ def convert_bloom_checkpoint_to_pytorch(
model
=
BloomModel
(
config
)
model
=
BloomModel
(
config
)
file_names
=
os
.
listdir
(
bloom_checkpoint_path
)
file_names
=
os
.
listdir
(
bloom_checkpoint_path
)
file_names
=
list
(
sorted
(
filter
(
lambda
s
:
s
.
startswith
(
"layer"
)
and
"model_00"
in
s
,
file_names
))
)
file_names
=
sorted
(
filter
(
lambda
s
:
s
.
startswith
(
"layer"
)
and
"model_00"
in
s
,
file_names
))
missing_keys
=
None
missing_keys
=
None
for
i
,
file
in
enumerate
(
file_names
):
for
i
,
file
in
enumerate
(
file_names
):
...
...
src/transformers/models/codegen/modeling_codegen.py
View file @
5e8c8eb5
...
@@ -85,7 +85,7 @@ def duplicate_interleave(m):
...
@@ -85,7 +85,7 @@ def duplicate_interleave(m):
# Copied from transformers.models.gptj.modeling_gptj.apply_rotary_pos_emb
# Copied from transformers.models.gptj.modeling_gptj.apply_rotary_pos_emb
def
apply_rotary_pos_emb
(
x
,
sincos
,
offset
=
0
):
def
apply_rotary_pos_emb
(
x
,
sincos
,
offset
=
0
):
sin
,
cos
=
map
(
lambda
t
:
duplicate_interleave
(
t
)[
None
,
offset
:
x
.
shape
[
1
]
+
offset
,
None
,
:]
,
sincos
)
sin
,
cos
=
(
duplicate_interleave
(
t
)[
None
,
offset
:
x
.
shape
[
1
]
+
offset
,
None
,
:]
for
t
in
sincos
)
# einsum notation for lambda t: repeat(t[offset:x.shape[1]+offset,:], "n d -> () n () (d j)", j=2)
# einsum notation for lambda t: repeat(t[offset:x.shape[1]+offset,:], "n d -> () n () (d j)", j=2)
return
(
x
*
cos
)
+
(
rotate_every_two
(
x
)
*
sin
)
return
(
x
*
cos
)
+
(
rotate_every_two
(
x
)
*
sin
)
...
...
src/transformers/models/conditional_detr/image_processing_conditional_detr.py
View file @
5e8c8eb5
...
@@ -604,7 +604,7 @@ def binary_mask_to_rle(mask):
...
@@ -604,7 +604,7 @@ def binary_mask_to_rle(mask):
pixels
=
np
.
concatenate
([[
0
],
pixels
,
[
0
]])
pixels
=
np
.
concatenate
([[
0
],
pixels
,
[
0
]])
runs
=
np
.
where
(
pixels
[
1
:]
!=
pixels
[:
-
1
])[
0
]
+
1
runs
=
np
.
where
(
pixels
[
1
:]
!=
pixels
[:
-
1
])[
0
]
+
1
runs
[
1
::
2
]
-=
runs
[::
2
]
runs
[
1
::
2
]
-=
runs
[::
2
]
return
[
x
for
x
in
runs
]
return
list
(
runs
)
# Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle
# Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle
...
...
src/transformers/models/convnext/modeling_convnext.py
View file @
5e8c8eb5
...
@@ -495,7 +495,7 @@ class ConvNextBackbone(ConvNextPreTrainedModel, BackboneMixin):
...
@@ -495,7 +495,7 @@ class ConvNextBackbone(ConvNextPreTrainedModel, BackboneMixin):
self
.
out_feature_channels
=
out_feature_channels
self
.
out_feature_channels
=
out_feature_channels
# Add layer norms to hidden states of out_features
# Add layer norms to hidden states of out_features
hidden_states_norms
=
dict
()
hidden_states_norms
=
{}
for
stage
,
num_channels
in
zip
(
self
.
out_features
,
self
.
channels
):
for
stage
,
num_channels
in
zip
(
self
.
out_features
,
self
.
channels
):
hidden_states_norms
[
stage
]
=
ConvNextLayerNorm
(
num_channels
,
data_format
=
"channels_first"
)
hidden_states_norms
[
stage
]
=
ConvNextLayerNorm
(
num_channels
,
data_format
=
"channels_first"
)
self
.
hidden_states_norms
=
nn
.
ModuleDict
(
hidden_states_norms
)
self
.
hidden_states_norms
=
nn
.
ModuleDict
(
hidden_states_norms
)
...
...
src/transformers/models/ctrl/tokenization_ctrl.py
View file @
5e8c8eb5
...
@@ -208,7 +208,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
...
@@ -208,7 +208,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
words
=
re
.
findall
(
r
"\S+\n?"
,
text
)
words
=
re
.
findall
(
r
"\S+\n?"
,
text
)
for
token
in
words
:
for
token
in
words
:
split_tokens
.
extend
(
[
t
for
t
in
self
.
bpe
(
token
).
split
(
" "
)
]
)
split_tokens
.
extend
(
list
(
self
.
bpe
(
token
).
split
(
" "
)
)
)
return
split_tokens
return
split_tokens
def
_convert_token_to_id
(
self
,
token
):
def
_convert_token_to_id
(
self
,
token
):
...
...
src/transformers/models/data2vec/modeling_tf_data2vec_vision.py
View file @
5e8c8eb5
...
@@ -596,7 +596,7 @@ class TFData2VecVisionEncoder(tf.keras.layers.Layer):
...
@@ -596,7 +596,7 @@ class TFData2VecVisionEncoder(tf.keras.layers.Layer):
self
.
relative_position_bias
=
None
self
.
relative_position_bias
=
None
# stochastic depth decay rule
# stochastic depth decay rule
dpr
=
[
x
for
x
in
tf
.
linspace
(
0.0
,
config
.
drop_path_rate
,
config
.
num_hidden_layers
)
]
dpr
=
list
(
tf
.
linspace
(
0.0
,
config
.
drop_path_rate
,
config
.
num_hidden_layers
)
)
self
.
layer
=
[
self
.
layer
=
[
TFData2VecVisionLayer
(
TFData2VecVisionLayer
(
config
,
config
,
...
...
src/transformers/models/deformable_detr/image_processing_deformable_detr.py
View file @
5e8c8eb5
...
@@ -602,7 +602,7 @@ def binary_mask_to_rle(mask):
...
@@ -602,7 +602,7 @@ def binary_mask_to_rle(mask):
pixels
=
np
.
concatenate
([[
0
],
pixels
,
[
0
]])
pixels
=
np
.
concatenate
([[
0
],
pixels
,
[
0
]])
runs
=
np
.
where
(
pixels
[
1
:]
!=
pixels
[:
-
1
])[
0
]
+
1
runs
=
np
.
where
(
pixels
[
1
:]
!=
pixels
[:
-
1
])[
0
]
+
1
runs
[
1
::
2
]
-=
runs
[::
2
]
runs
[
1
::
2
]
-=
runs
[::
2
]
return
[
x
for
x
in
runs
]
return
list
(
runs
)
# Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle
# Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle
...
...
src/transformers/models/detr/image_processing_detr.py
View file @
5e8c8eb5
...
@@ -590,7 +590,7 @@ def binary_mask_to_rle(mask):
...
@@ -590,7 +590,7 @@ def binary_mask_to_rle(mask):
pixels
=
np
.
concatenate
([[
0
],
pixels
,
[
0
]])
pixels
=
np
.
concatenate
([[
0
],
pixels
,
[
0
]])
runs
=
np
.
where
(
pixels
[
1
:]
!=
pixels
[:
-
1
])[
0
]
+
1
runs
=
np
.
where
(
pixels
[
1
:]
!=
pixels
[:
-
1
])[
0
]
+
1
runs
[
1
::
2
]
-=
runs
[::
2
]
runs
[
1
::
2
]
-=
runs
[::
2
]
return
[
x
for
x
in
runs
]
return
list
(
runs
)
# TODO - (Amy) make compatible with other frameworks
# TODO - (Amy) make compatible with other frameworks
...
...
src/transformers/models/dinat/modeling_dinat.py
View file @
5e8c8eb5
...
@@ -899,7 +899,7 @@ class DinatBackbone(DinatPreTrainedModel, BackboneMixin):
...
@@ -899,7 +899,7 @@ class DinatBackbone(DinatPreTrainedModel, BackboneMixin):
self
.
out_feature_channels
[
stage
]
=
num_features
[
i
]
self
.
out_feature_channels
[
stage
]
=
num_features
[
i
]
# Add layer norms to hidden states of out_features
# Add layer norms to hidden states of out_features
hidden_states_norms
=
dict
()
hidden_states_norms
=
{}
for
stage
,
num_channels
in
zip
(
self
.
out_features
,
self
.
channels
):
for
stage
,
num_channels
in
zip
(
self
.
out_features
,
self
.
channels
):
hidden_states_norms
[
stage
]
=
nn
.
LayerNorm
(
num_channels
)
hidden_states_norms
[
stage
]
=
nn
.
LayerNorm
(
num_channels
)
self
.
hidden_states_norms
=
nn
.
ModuleDict
(
hidden_states_norms
)
self
.
hidden_states_norms
=
nn
.
ModuleDict
(
hidden_states_norms
)
...
...
src/transformers/models/donut/processing_donut.py
View file @
5e8c8eb5
...
@@ -130,7 +130,7 @@ class DonutProcessor(ProcessorMixin):
...
@@ -130,7 +130,7 @@ class DonutProcessor(ProcessorMixin):
if
added_vocab
is
None
:
if
added_vocab
is
None
:
added_vocab
=
self
.
tokenizer
.
get_added_vocab
()
added_vocab
=
self
.
tokenizer
.
get_added_vocab
()
output
=
dict
()
output
=
{}
while
tokens
:
while
tokens
:
start_token
=
re
.
search
(
r
"<s_(.*?)>"
,
tokens
,
re
.
IGNORECASE
)
start_token
=
re
.
search
(
r
"<s_(.*?)>"
,
tokens
,
re
.
IGNORECASE
)
...
...
src/transformers/models/ernie_m/tokenization_ernie_m.py
View file @
5e8c8eb5
...
@@ -133,8 +133,8 @@ class ErnieMTokenizer(PreTrainedTokenizer):
...
@@ -133,8 +133,8 @@ class ErnieMTokenizer(PreTrainedTokenizer):
if
vocab_file
is
not
None
:
if
vocab_file
is
not
None
:
self
.
vocab
=
self
.
load_vocab
(
filepath
=
vocab_file
)
self
.
vocab
=
self
.
load_vocab
(
filepath
=
vocab_file
)
else
:
else
:
self
.
vocab
=
dict
((
self
.
sp_model
.
id_to_piece
(
id
)
,
id
)
for
id
in
range
(
self
.
sp_model
.
get_piece_size
())
)
self
.
vocab
=
{
self
.
sp_model
.
id_to_piece
(
id
)
:
id
for
id
in
range
(
self
.
sp_model
.
get_piece_size
())
}
self
.
reverse_vocab
=
dict
((
v
,
k
)
for
k
,
v
in
self
.
vocab
.
items
()
)
self
.
reverse_vocab
=
{
v
:
k
for
k
,
v
in
self
.
vocab
.
items
()
}
def
get_offset_mapping
(
self
,
text
):
def
get_offset_mapping
(
self
,
text
):
if
text
is
None
:
if
text
is
None
:
...
@@ -325,7 +325,7 @@ class ErnieMTokenizer(PreTrainedTokenizer):
...
@@ -325,7 +325,7 @@ class ErnieMTokenizer(PreTrainedTokenizer):
"You should not supply a second sequence if the provided sequence of "
"You should not supply a second sequence if the provided sequence of "
"ids is already formatted with special tokens for the model."
"ids is already formatted with special tokens for the model."
)
)
return
list
(
map
(
lambda
x
:
1
if
x
in
[
self
.
sep_token_id
,
self
.
cls_token_id
]
else
0
,
token_ids_0
))
return
[
1
if
x
in
[
self
.
sep_token_id
,
self
.
cls_token_id
]
else
0
for
x
in
token_ids_0
]
if
token_ids_1
is
not
None
:
if
token_ids_1
is
not
None
:
return
[
1
]
+
([
0
]
*
len
(
token_ids_0
))
+
[
1
,
1
]
+
([
0
]
*
len
(
token_ids_1
))
+
[
1
]
return
[
1
]
+
([
0
]
*
len
(
token_ids_0
))
+
[
1
,
1
]
+
([
0
]
*
len
(
token_ids_1
))
+
[
1
]
...
...
src/transformers/models/esm/modeling_esmfold.py
View file @
5e8c8eb5
...
@@ -201,9 +201,9 @@ def collate_dense_tensors(samples: List[torch.Tensor], pad_v: float = 0) -> torc
...
@@ -201,9 +201,9 @@ def collate_dense_tensors(samples: List[torch.Tensor], pad_v: float = 0) -> torc
"""
"""
if
len
(
samples
)
==
0
:
if
len
(
samples
)
==
0
:
return
torch
.
Tensor
()
return
torch
.
Tensor
()
if
len
(
set
(
x
.
dim
()
for
x
in
samples
)
)
!=
1
:
if
len
(
{
x
.
dim
()
for
x
in
samples
}
)
!=
1
:
raise
RuntimeError
(
f
"Samples has varying dimensions:
{
[
x
.
dim
()
for
x
in
samples
]
}
"
)
raise
RuntimeError
(
f
"Samples has varying dimensions:
{
[
x
.
dim
()
for
x
in
samples
]
}
"
)
(
device
,)
=
tuple
(
set
(
x
.
device
for
x
in
samples
)
)
# assumes all on same device
(
device
,)
=
tuple
(
{
x
.
device
for
x
in
samples
}
)
# assumes all on same device
max_shape
=
[
max
(
lst
)
for
lst
in
zip
(
*
[
x
.
shape
for
x
in
samples
])]
max_shape
=
[
max
(
lst
)
for
lst
in
zip
(
*
[
x
.
shape
for
x
in
samples
])]
result
=
torch
.
empty
(
len
(
samples
),
*
max_shape
,
dtype
=
samples
[
0
].
dtype
,
device
=
device
)
result
=
torch
.
empty
(
len
(
samples
),
*
max_shape
,
dtype
=
samples
[
0
].
dtype
,
device
=
device
)
result
.
fill_
(
pad_v
)
result
.
fill_
(
pad_v
)
...
...
src/transformers/models/esm/openfold_utils/chunk_utils.py
View file @
5e8c8eb5
...
@@ -83,7 +83,7 @@ def _get_minimal_slice_set(
...
@@ -83,7 +83,7 @@ def _get_minimal_slice_set(
# Base cases. Either start/end are empty and we're done, or the final,
# Base cases. Either start/end are empty and we're done, or the final,
# one-dimensional tensor can be simply sliced
# one-dimensional tensor can be simply sliced
if
len
(
start
)
==
0
:
if
len
(
start
)
==
0
:
return
[
tuple
()]
return
[()]
elif
len
(
start
)
==
1
:
elif
len
(
start
)
==
1
:
return
[(
slice
(
start
[
0
],
end
[
0
]
+
1
),)]
return
[(
slice
(
start
[
0
],
end
[
0
]
+
1
),)]
...
...
src/transformers/models/flaubert/tokenization_flaubert.py
View file @
5e8c8eb5
...
@@ -282,10 +282,10 @@ class FlaubertTokenizer(PreTrainedTokenizer):
...
@@ -282,10 +282,10 @@ class FlaubertTokenizer(PreTrainedTokenizer):
self
.
sm
=
sacremoses
self
.
sm
=
sacremoses
# cache of sm.MosesPunctNormalizer instance
# cache of sm.MosesPunctNormalizer instance
self
.
cache_moses_punct_normalizer
=
dict
()
self
.
cache_moses_punct_normalizer
=
{}
# cache of sm.MosesTokenizer instance
# cache of sm.MosesTokenizer instance
self
.
cache_moses_tokenizer
=
dict
()
self
.
cache_moses_tokenizer
=
{}
self
.
lang_with_custom_tokenizer
=
set
([
"zh"
,
"th"
,
"ja"
])
self
.
lang_with_custom_tokenizer
=
{
"zh"
,
"th"
,
"ja"
}
self
.
lang2id
=
lang2id
self
.
lang2id
=
lang2id
self
.
id2lang
=
id2lang
self
.
id2lang
=
id2lang
if
lang2id
is
not
None
and
id2lang
is
not
None
:
if
lang2id
is
not
None
and
id2lang
is
not
None
:
...
@@ -452,7 +452,7 @@ class FlaubertTokenizer(PreTrainedTokenizer):
...
@@ -452,7 +452,7 @@ class FlaubertTokenizer(PreTrainedTokenizer):
split_tokens
=
[]
split_tokens
=
[]
for
token
in
text
:
for
token
in
text
:
if
token
:
if
token
:
split_tokens
.
extend
(
[
t
for
t
in
self
.
bpe
(
token
).
split
(
" "
)
]
)
split_tokens
.
extend
(
list
(
self
.
bpe
(
token
).
split
(
" "
)
)
)
return
split_tokens
return
split_tokens
...
...
src/transformers/models/fsmt/tokenization_fsmt.py
View file @
5e8c8eb5
...
@@ -226,10 +226,10 @@ class FSMTTokenizer(PreTrainedTokenizer):
...
@@ -226,10 +226,10 @@ class FSMTTokenizer(PreTrainedTokenizer):
self
.
do_lower_case
=
do_lower_case
self
.
do_lower_case
=
do_lower_case
# cache of sm.MosesPunctNormalizer instance
# cache of sm.MosesPunctNormalizer instance
self
.
cache_moses_punct_normalizer
=
dict
()
self
.
cache_moses_punct_normalizer
=
{}
# cache of sm.MosesTokenizer instance
# cache of sm.MosesTokenizer instance
self
.
cache_moses_tokenizer
=
dict
()
self
.
cache_moses_tokenizer
=
{}
self
.
cache_moses_detokenizer
=
dict
()
self
.
cache_moses_detokenizer
=
{}
if
langs
and
len
(
langs
)
==
2
:
if
langs
and
len
(
langs
)
==
2
:
self
.
src_lang
,
self
.
tgt_lang
=
langs
self
.
src_lang
,
self
.
tgt_lang
=
langs
...
@@ -379,7 +379,7 @@ class FSMTTokenizer(PreTrainedTokenizer):
...
@@ -379,7 +379,7 @@ class FSMTTokenizer(PreTrainedTokenizer):
split_tokens
=
[]
split_tokens
=
[]
for
token
in
text
:
for
token
in
text
:
if
token
:
if
token
:
split_tokens
.
extend
(
[
t
for
t
in
self
.
bpe
(
token
).
split
(
" "
)
]
)
split_tokens
.
extend
(
list
(
self
.
bpe
(
token
).
split
(
" "
)
)
)
return
split_tokens
return
split_tokens
...
...
src/transformers/models/gptj/modeling_gptj.py
View file @
5e8c8eb5
...
@@ -78,7 +78,7 @@ def duplicate_interleave(m):
...
@@ -78,7 +78,7 @@ def duplicate_interleave(m):
def
apply_rotary_pos_emb
(
x
,
sincos
,
offset
=
0
):
def
apply_rotary_pos_emb
(
x
,
sincos
,
offset
=
0
):
sin
,
cos
=
map
(
lambda
t
:
duplicate_interleave
(
t
)[
None
,
offset
:
x
.
shape
[
1
]
+
offset
,
None
,
:]
,
sincos
)
sin
,
cos
=
(
duplicate_interleave
(
t
)[
None
,
offset
:
x
.
shape
[
1
]
+
offset
,
None
,
:]
for
t
in
sincos
)
# einsum notation for lambda t: repeat(t[offset:x.shape[1]+offset,:], "n d -> () n () (d j)", j=2)
# einsum notation for lambda t: repeat(t[offset:x.shape[1]+offset,:], "n d -> () n () (d j)", j=2)
return
(
x
*
cos
)
+
(
rotate_every_two
(
x
)
*
sin
)
return
(
x
*
cos
)
+
(
rotate_every_two
(
x
)
*
sin
)
...
...
src/transformers/models/herbert/tokenization_herbert.py
View file @
5e8c8eb5
...
@@ -348,10 +348,10 @@ class HerbertTokenizer(PreTrainedTokenizer):
...
@@ -348,10 +348,10 @@ class HerbertTokenizer(PreTrainedTokenizer):
self
.
sm
=
sacremoses
self
.
sm
=
sacremoses
# cache of sm.MosesPunctNormalizer instance
# cache of sm.MosesPunctNormalizer instance
self
.
cache_moses_punct_normalizer
=
dict
()
self
.
cache_moses_punct_normalizer
=
{}
# cache of sm.MosesTokenizer instance
# cache of sm.MosesTokenizer instance
self
.
cache_moses_tokenizer
=
dict
()
self
.
cache_moses_tokenizer
=
{}
self
.
lang_with_custom_tokenizer
=
set
([
"zh"
,
"th"
,
"ja"
])
self
.
lang_with_custom_tokenizer
=
{
"zh"
,
"th"
,
"ja"
}
# True for current supported model (v1.2.0), False for XLM-17 & 100
# True for current supported model (v1.2.0), False for XLM-17 & 100
self
.
do_lowercase_and_remove_accent
=
do_lowercase_and_remove_accent
self
.
do_lowercase_and_remove_accent
=
do_lowercase_and_remove_accent
self
.
lang2id
=
lang2id
self
.
lang2id
=
lang2id
...
@@ -490,7 +490,7 @@ class HerbertTokenizer(PreTrainedTokenizer):
...
@@ -490,7 +490,7 @@ class HerbertTokenizer(PreTrainedTokenizer):
split_tokens
=
[]
split_tokens
=
[]
for
token
in
pre_tokens
:
for
token
in
pre_tokens
:
if
token
:
if
token
:
split_tokens
.
extend
(
[
t
for
t
in
self
.
bpe
(
token
).
split
(
" "
)
]
)
split_tokens
.
extend
(
list
(
self
.
bpe
(
token
).
split
(
" "
)
)
)
return
split_tokens
return
split_tokens
...
...
src/transformers/models/jukebox/modeling_jukebox.py
View file @
5e8c8eb5
...
@@ -138,7 +138,7 @@ def get_alignment(music_tokens, labels, prior, config):
...
@@ -138,7 +138,7 @@ def get_alignment(music_tokens, labels, prior, config):
hop_length
=
int
(
config
.
hop_fraction
[
-
level
-
1
]
*
prior
.
n_ctx
)
hop_length
=
int
(
config
.
hop_fraction
[
-
level
-
1
]
*
prior
.
n_ctx
)
alignment_head
,
alignment_layer
=
config
.
prior_alignment_head
[
0
],
config
.
prior_alignment_layer
[
0
]
alignment_head
,
alignment_layer
=
config
.
prior_alignment_head
[
0
],
config
.
prior_alignment_layer
[
0
]
attn_layers
=
set
([
alignment_layer
])
attn_layers
=
{
alignment_layer
}
alignment_hops
=
{}
alignment_hops
=
{}
indices_hops
=
{}
indices_hops
=
{}
for
start
in
tqdm
(
get_starts
(
total_length
,
n_ctx
,
hop_length
),
desc
=
"Computing lyric to music alignment "
):
for
start
in
tqdm
(
get_starts
(
total_length
,
n_ctx
,
hop_length
),
desc
=
"Computing lyric to music alignment "
):
...
@@ -436,7 +436,7 @@ class JukeboxBottleneckBlock(nn.Module):
...
@@ -436,7 +436,7 @@ class JukeboxBottleneckBlock(nn.Module):
used_curr
=
(
_codebook_elem
>=
self
.
threshold
).
sum
()
used_curr
=
(
_codebook_elem
>=
self
.
threshold
).
sum
()
usage
=
torch
.
sum
(
usage
)
usage
=
torch
.
sum
(
usage
)
dk
=
torch
.
norm
(
self
.
codebook
-
old_codebook
)
/
np
.
sqrt
(
np
.
prod
(
old_codebook
.
shape
))
dk
=
torch
.
norm
(
self
.
codebook
-
old_codebook
)
/
np
.
sqrt
(
np
.
prod
(
old_codebook
.
shape
))
return
dict
(
entropy
=
entropy
,
used_curr
=
used_curr
,
usage
=
usage
,
dk
=
dk
)
return
{
"
entropy
"
:
entropy
,
"
used_curr
"
:
used_curr
,
"
usage
"
:
usage
,
"
dk
"
:
dk
}
def
preprocess
(
self
,
hidden_states
):
def
preprocess
(
self
,
hidden_states
):
hidden_states
=
hidden_states
.
permute
(
0
,
2
,
1
).
contiguous
()
hidden_states
=
hidden_states
.
permute
(
0
,
2
,
1
).
contiguous
()
...
@@ -2213,11 +2213,11 @@ class JukeboxPrior(PreTrainedModel):
...
@@ -2213,11 +2213,11 @@ class JukeboxPrior(PreTrainedModel):
loss
=
self
.
encoder_loss_fraction
*
encoder_loss
*
self
.
nb_relevant_lyric_tokens
/
self
.
total_loss_dims
loss
=
self
.
encoder_loss_fraction
*
encoder_loss
*
self
.
nb_relevant_lyric_tokens
/
self
.
total_loss_dims
loss
+=
next_token_prediction_loss
*
self
.
next_token_prediction_loss_dims
/
self
.
total_loss_dims
loss
+=
next_token_prediction_loss
*
self
.
next_token_prediction_loss_dims
/
self
.
total_loss_dims
metrics
=
dict
(
metrics
=
{
bpd
=
next_token_prediction_loss
.
clone
().
detach
(),
"
bpd
"
:
next_token_prediction_loss
.
clone
().
detach
(),
encoder_loss
=
encoder_loss
.
clone
().
detach
(),
"
encoder_loss
"
:
encoder_loss
.
clone
().
detach
(),
next_token_prediction_loss
=
next_token_prediction_loss
.
clone
().
detach
(),
"
next_token_prediction_loss
"
:
next_token_prediction_loss
.
clone
().
detach
(),
)
}
if
get_preds
:
if
get_preds
:
metrics
[
"preds"
]
=
preds
.
clone
().
detach
()
metrics
[
"preds"
]
=
preds
.
clone
().
detach
()
if
get_attn_weights
:
if
get_attn_weights
:
...
@@ -2533,11 +2533,11 @@ class JukeboxModel(JukeboxPreTrainedModel):
...
@@ -2533,11 +2533,11 @@ class JukeboxModel(JukeboxPreTrainedModel):
# total length of the signal, might be bit different from the actual generated length
# total length of the signal, might be bit different from the actual generated length
self
.
total_length
=
total_length
self
.
total_length
=
total_length
for
level
in
sample_levels
:
for
level
in
sample_levels
:
sampling_kwargs
=
dict
(
sampling_kwargs
=
{
temp
=
0.99
if
level
==
len
(
self
.
priors
)
-
1
else
sampling_temperature
,
"
temp
"
:
0.99
if
level
==
len
(
self
.
priors
)
-
1
else
sampling_temperature
,
chunk_size
=
chunk_size
,
"
chunk_size
"
:
chunk_size
,
sample_tokens
=
sample_tokens
,
"
sample_tokens
"
:
sample_tokens
,
)
}
# Set correct total_length, hop_length, labels and sampling_kwargs for level
# Set correct total_length, hop_length, labels and sampling_kwargs for level
total_token_to_sample
=
total_length
//
self
.
priors
[
level
].
raw_to_tokens
total_token_to_sample
=
total_length
//
self
.
priors
[
level
].
raw_to_tokens
...
...
src/transformers/models/jukebox/tokenization_jukebox.py
View file @
5e8c8eb5
...
@@ -187,7 +187,7 @@ class JukeboxTokenizer(PreTrainedTokenizer):
...
@@ -187,7 +187,7 @@ class JukeboxTokenizer(PreTrainedTokenizer):
Do NOT take care of added tokens. Only the lyrics are split into character for the character-based vocabulary.
Do NOT take care of added tokens. Only the lyrics are split into character for the character-based vocabulary.
"""
"""
# only lyrics are not tokenized, but character based is easily handled
# only lyrics are not tokenized, but character based is easily handled
return
[
character
for
character
in
lyrics
]
return
list
(
lyrics
)
def
tokenize
(
self
,
artist
,
genre
,
lyrics
,
**
kwargs
):
def
tokenize
(
self
,
artist
,
genre
,
lyrics
,
**
kwargs
):
"""
"""
...
...
Prev
1
2
3
4
5
6
7
8
9
10
…
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment