Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
15185084
"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "b65df514d10cbaf16de9589a520662c178368d80"
Unverified
Commit
15185084
authored
May 22, 2024
by
Jonatan Kłosko
Committed by
GitHub
May 22, 2024
Browse files
Avoid extra chunk in speech recognition (#29539)
parent
24d2a5e1
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
6 deletions
+5
-6
src/transformers/pipelines/automatic_speech_recognition.py
src/transformers/pipelines/automatic_speech_recognition.py
+1
-2
tests/pipelines/test_pipelines_automatic_speech_recognition.py
.../pipelines/test_pipelines_automatic_speech_recognition.py
+4
-4
No files found.
src/transformers/pipelines/automatic_speech_recognition.py
View file @
15185084
...
@@ -67,8 +67,7 @@ def chunk_iter(inputs, feature_extractor, chunk_len, stride_left, stride_right,
...
@@ -67,8 +67,7 @@ def chunk_iter(inputs, feature_extractor, chunk_len, stride_left, stride_right,
if
dtype
is
not
None
:
if
dtype
is
not
None
:
processed
=
processed
.
to
(
dtype
=
dtype
)
processed
=
processed
.
to
(
dtype
=
dtype
)
_stride_left
=
0
if
chunk_start_idx
==
0
else
stride_left
_stride_left
=
0
if
chunk_start_idx
==
0
else
stride_left
# all right strides must be full, otherwise it is the last item
is_last
=
chunk_end_idx
>=
inputs_len
is_last
=
chunk_end_idx
>
inputs_len
if
stride_right
>
0
else
chunk_end_idx
>=
inputs_len
_stride_right
=
0
if
is_last
else
stride_right
_stride_right
=
0
if
is_last
else
stride_right
chunk_len
=
chunk
.
shape
[
0
]
chunk_len
=
chunk
.
shape
[
0
]
...
...
tests/pipelines/test_pipelines_automatic_speech_recognition.py
View file @
15185084
...
@@ -1569,10 +1569,10 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
...
@@ -1569,10 +1569,10 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
"input_values"
"input_values"
]
]
outs
=
list
(
chunk_iter
(
inputs
,
feature_extractor
,
100
,
20
,
10
))
outs
=
list
(
chunk_iter
(
inputs
,
feature_extractor
,
100
,
20
,
10
))
self
.
assertEqual
(
len
(
outs
),
2
)
self
.
assertEqual
(
len
(
outs
),
1
)
self
.
assertEqual
([
o
[
"stride"
]
for
o
in
outs
],
[(
100
,
0
,
10
),
(
30
,
20
,
0
)])
self
.
assertEqual
([
o
[
"stride"
]
for
o
in
outs
],
[(
100
,
0
,
0
)])
self
.
assertEqual
([
o
[
"input_values"
].
shape
for
o
in
outs
],
[(
1
,
100
)
,
(
1
,
30
)
])
self
.
assertEqual
([
o
[
"input_values"
].
shape
for
o
in
outs
],
[(
1
,
100
)])
self
.
assertEqual
([
o
[
"is_last"
]
for
o
in
outs
],
[
False
,
True
])
self
.
assertEqual
([
o
[
"is_last"
]
for
o
in
outs
],
[
True
])
outs
=
list
(
chunk_iter
(
inputs
,
feature_extractor
,
80
,
20
,
10
))
outs
=
list
(
chunk_iter
(
inputs
,
feature_extractor
,
80
,
20
,
10
))
self
.
assertEqual
(
len
(
outs
),
2
)
self
.
assertEqual
(
len
(
outs
),
2
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment