Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
ca57b450
Unverified
Commit
ca57b450
authored
Feb 24, 2022
by
Patrick von Platen
Committed by
GitHub
Feb 24, 2022
Browse files
[Unispeech] Fix slow tests (#15818)
* remove soundfile old way of loading audio * Adapt slow test
parent
35ecf99c
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
32 deletions
+16
-32
tests/unispeech/test_modeling_unispeech.py
tests/unispeech/test_modeling_unispeech.py
+6
-14
tests/unispeech_sat/test_modeling_unispeech_sat.py
tests/unispeech_sat/test_modeling_unispeech_sat.py
+10
-18
No files found.
tests/unispeech/test_modeling_unispeech.py
View file @
ca57b450
...
...
@@ -538,21 +538,13 @@ class UniSpeechRobustModelTest(ModelTesterMixin, unittest.TestCase):
@
slow
class
UniSpeechModelIntegrationTest
(
unittest
.
TestCase
):
def
_load_datasamples
(
self
,
num_samples
):
import
soundfile
as
sf
ds
=
load_dataset
(
"hf-internal-testing/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
# automatic decoding with librispeech
speech_samples
=
ds
.
sort
(
"id"
).
filter
(
lambda
x
:
x
[
"id"
]
in
[
f
"1272-141231-000
{
i
}
"
for
i
in
range
(
num_samples
)]
)[:
num_samples
][
"audio"
]
ids
=
[
f
"1272-141231-000
{
i
}
"
for
i
in
range
(
num_samples
)]
# map files to raw
def
map_to_array
(
batch
):
speech
,
_
=
sf
.
read
(
batch
[
"file"
])
batch
[
"speech"
]
=
speech
return
batch
ds
=
load_dataset
(
"patrickvonplaten/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
ds
=
ds
.
filter
(
lambda
x
:
x
[
"id"
]
in
ids
).
sort
(
"id"
).
map
(
map_to_array
)
return
ds
[
"speech"
][:
num_samples
]
return
[
x
[
"array"
]
for
x
in
speech_samples
]
def
_load_superb
(
self
,
task
,
num_samples
):
...
...
tests/unispeech_sat/test_modeling_unispeech_sat.py
View file @
ca57b450
...
...
@@ -800,21 +800,13 @@ class UniSpeechSatRobustModelTest(ModelTesterMixin, unittest.TestCase):
@
slow
class
UniSpeechSatModelIntegrationTest
(
unittest
.
TestCase
):
def
_load_datasamples
(
self
,
num_samples
):
import
soundfile
as
sf
ds
=
load_dataset
(
"hf-internal-testing/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
# automatic decoding with librispeech
speech_samples
=
ds
.
sort
(
"id"
).
filter
(
lambda
x
:
x
[
"id"
]
in
[
f
"1272-141231-000
{
i
}
"
for
i
in
range
(
num_samples
)]
)[:
num_samples
][
"audio"
]
ids
=
[
f
"1272-141231-000
{
i
}
"
for
i
in
range
(
num_samples
)]
# map files to raw
def
map_to_array
(
batch
):
speech
,
_
=
sf
.
read
(
batch
[
"file"
])
batch
[
"speech"
]
=
speech
return
batch
ds
=
load_dataset
(
"patrickvonplaten/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
ds
=
ds
.
filter
(
lambda
x
:
x
[
"id"
]
in
ids
).
sort
(
"id"
).
map
(
map_to_array
)
return
ds
[
"speech"
][:
num_samples
]
return
[
x
[
"array"
]
for
x
in
speech_samples
]
def
_load_superb
(
self
,
task
,
num_samples
):
ds
=
load_dataset
(
"anton-l/superb_dummy"
,
task
,
split
=
"test"
)
...
...
@@ -865,10 +857,10 @@ class UniSpeechSatModelIntegrationTest(unittest.TestCase):
# fmt: off
expected_hidden_states_slice
=
torch
.
tensor
(
[[[
-
0.11
7
2
,
-
0.0
797
],
[
-
0.0012
,
0.02
1
3
]],
[[
-
0.12
25
,
-
0.1
277
],
[
-
0.06
6
8
,
-
0.05
8
5
]]],
[[[
-
0.11
9
2
,
-
0.0
825
],
[
-
0.0012
,
0.023
5
]],
[[
-
0.12
40
,
-
0.1
332
],
[
-
0.06
5
8
,
-
0.05
6
5
]]],
device
=
torch_device
,
)
# fmt: on
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment