Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
d29baf69
Unverified
Commit
d29baf69
authored
Nov 04, 2021
by
Nicolas Patry
Committed by
GitHub
Nov 04, 2021
Browse files
Fixing mishandling of `ignore_labels`. (#14274)
Fixes #14272
parent
68427c9b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
4 deletions
+14
-4
src/transformers/pipelines/token_classification.py
src/transformers/pipelines/token_classification.py
+5
-4
tests/test_pipelines_token_classification.py
tests/test_pipelines_token_classification.py
+9
-0
No files found.
src/transformers/pipelines/token_classification.py
View file @
d29baf69
...
...
@@ -96,7 +96,6 @@ class TokenClassificationPipeline(Pipeline):
default_input_names
=
"sequences"
def
__init__
(
self
,
args_parser
=
TokenClassificationArgumentHandler
(),
*
args
,
**
kwargs
):
self
.
ignore_labels
=
[
"O"
]
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
check_model_type
(
TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
...
...
@@ -216,7 +215,9 @@ class TokenClassificationPipeline(Pipeline):
**
model_inputs
,
}
def
postprocess
(
self
,
model_outputs
,
aggregation_strategy
=
AggregationStrategy
.
NONE
):
def
postprocess
(
self
,
model_outputs
,
aggregation_strategy
=
AggregationStrategy
.
NONE
,
ignore_labels
=
None
):
if
ignore_labels
is
None
:
ignore_labels
=
[
"O"
]
logits
=
model_outputs
[
"logits"
][
0
].
numpy
()
sentence
=
model_outputs
[
"sentence"
]
input_ids
=
model_outputs
[
"input_ids"
][
0
]
...
...
@@ -235,8 +236,8 @@ class TokenClassificationPipeline(Pipeline):
entities
=
[
entity
for
entity
in
grouped_entities
if
entity
.
get
(
"entity"
,
None
)
not
in
self
.
ignore_labels
and
entity
.
get
(
"entity_group"
,
None
)
not
in
self
.
ignore_labels
if
entity
.
get
(
"entity"
,
None
)
not
in
ignore_labels
and
entity
.
get
(
"entity_group"
,
None
)
not
in
ignore_labels
]
return
entities
...
...
tests/test_pipelines_token_classification.py
View file @
d29baf69
...
...
@@ -627,6 +627,15 @@ class TokenClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
],
)
token_classifier
=
pipeline
(
task
=
"token-classification"
,
model
=
model_name
,
framework
=
"pt"
,
ignore_labels
=
[
"O"
,
"I-MISC"
]
)
outputs
=
token_classifier
(
"This is a test !"
)
self
.
assertEqual
(
nested_simplify
(
outputs
),
[],
)
@
require_torch
def
test_pt_ignore_subwords_slow_tokenizer_raises
(
self
):
model_name
=
"sshleifer/tiny-dbmdz-bert-large-cased-finetuned-conll03-english"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment