Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
c9184a2e
Unverified
Commit
c9184a2e
authored
Sep 02, 2021
by
Nicolas Patry
Committed by
GitHub
Sep 02, 2021
Browse files
Enabling automatic loading of tokenizer with `pipeline` for (#13376)
`audio-classification`.
parent
e92140c5
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
10 additions
and
7 deletions
+10
-7
src/transformers/pipelines/__init__.py
src/transformers/pipelines/__init__.py
+7
-0
tests/test_pipelines_audio_classification.py
tests/test_pipelines_audio_classification.py
+3
-7
No files found.
src/transformers/pipelines/__init__.py
View file @
c9184a2e
...
@@ -449,6 +449,13 @@ def pipeline(
...
@@ -449,6 +449,13 @@ def pipeline(
load_tokenizer
=
type
(
model_config
)
in
TOKENIZER_MAPPING
or
model_config
.
tokenizer_class
is
not
None
load_tokenizer
=
type
(
model_config
)
in
TOKENIZER_MAPPING
or
model_config
.
tokenizer_class
is
not
None
load_feature_extractor
=
type
(
model_config
)
in
FEATURE_EXTRACTOR_MAPPING
or
feature_extractor
is
not
None
load_feature_extractor
=
type
(
model_config
)
in
FEATURE_EXTRACTOR_MAPPING
or
feature_extractor
is
not
None
if
task
in
{
"audio-classification"
}:
# Audio classification will never require a tokenizer.
# the model on the other hand might have a tokenizer, but
# the files could be missing from the hub, instead of failing
# on such repos, we just force to not load it.
load_tokenizer
=
False
if
load_tokenizer
:
if
load_tokenizer
:
# Try to infer tokenizer from model or config name (if provided as str)
# Try to infer tokenizer from model or config name (if provided as str)
if
tokenizer
is
None
:
if
tokenizer
is
None
:
...
...
tests/test_pipelines_audio_classification.py
View file @
c9184a2e
...
@@ -16,7 +16,7 @@ import unittest
...
@@ -16,7 +16,7 @@ import unittest
import
numpy
as
np
import
numpy
as
np
from
transformers
import
MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
,
PreTrainedTokenizer
from
transformers
import
MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
from
transformers.pipelines
import
AudioClassificationPipeline
,
pipeline
from
transformers.pipelines
import
AudioClassificationPipeline
,
pipeline
from
transformers.testing_utils
import
(
from
transformers.testing_utils
import
(
is_pipeline_test
,
is_pipeline_test
,
...
@@ -77,9 +77,7 @@ class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
...
@@ -77,9 +77,7 @@ class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
def
test_small_model_pt
(
self
):
def
test_small_model_pt
(
self
):
model
=
"anton-l/wav2vec2-random-tiny-classifier"
model
=
"anton-l/wav2vec2-random-tiny-classifier"
# hack: dummy tokenizer is required to prevent pipeline from failing
audio_classifier
=
pipeline
(
"audio-classification"
,
model
=
model
)
tokenizer
=
PreTrainedTokenizer
()
audio_classifier
=
pipeline
(
"audio-classification"
,
model
=
model
,
tokenizer
=
tokenizer
)
audio
=
np
.
ones
((
8000
,))
audio
=
np
.
ones
((
8000
,))
output
=
audio_classifier
(
audio
,
top_k
=
4
)
output
=
audio_classifier
(
audio
,
top_k
=
4
)
...
@@ -101,9 +99,7 @@ class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
...
@@ -101,9 +99,7 @@ class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
model
=
"superb/wav2vec2-base-superb-ks"
model
=
"superb/wav2vec2-base-superb-ks"
# hack: dummy tokenizer is required to prevent pipeline from failing
audio_classifier
=
pipeline
(
"audio-classification"
,
model
=
model
)
tokenizer
=
PreTrainedTokenizer
()
audio_classifier
=
pipeline
(
"audio-classification"
,
model
=
model
,
tokenizer
=
tokenizer
)
dataset
=
datasets
.
load_dataset
(
"anton-l/superb_dummy"
,
"ks"
,
split
=
"test"
)
dataset
=
datasets
.
load_dataset
(
"anton-l/superb_dummy"
,
"ks"
,
split
=
"test"
)
audio
=
np
.
array
(
dataset
[
3
][
"speech"
],
dtype
=
np
.
float32
)
audio
=
np
.
array
(
dataset
[
3
][
"speech"
],
dtype
=
np
.
float32
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment