Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
ec5d6c6a
"test/vscode:/vscode.git/clone" did not exist on "47488cc3538c6232382e3bf619d13c8ec8eda513"
Commit
ec5d6c6a
authored
Dec 19, 2019
by
Morgan Funtowicz
Browse files
Adressing issue with NER task omitting first and last word.
parent
d0724d07
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
4 deletions
+4
-4
transformers/pipelines.py
transformers/pipelines.py
+4
-4
No files found.
transformers/pipelines.py
View file @
ec5d6c6a
...
...
@@ -318,8 +318,6 @@ class NerPipeline(Pipeline):
"""
Named Entity Recognition pipeline using ModelForTokenClassification head.
"""
def
__init__
(
self
,
model
,
tokenizer
:
PreTrainedTokenizer
):
super
().
__init__
(
model
,
tokenizer
)
def
__call__
(
self
,
*
texts
,
**
kwargs
):
inputs
,
answers
=
self
.
_args_parser
(
*
texts
,
**
kwargs
),
[]
...
...
@@ -344,14 +342,16 @@ class NerPipeline(Pipeline):
# Normalize scores
answer
,
token_start
=
[],
1
for
idx
,
word
in
groupby
(
token_to_word
[
1
:
-
1
]
):
for
idx
,
word
in
groupby
(
token_to_word
):
# Sum log prob over token, then normalize across labels
score
=
np
.
exp
(
entities
[
token_start
])
/
np
.
exp
(
entities
[
token_start
]).
sum
(
-
1
,
keepdims
=
True
)
label_idx
=
score
.
argmax
()
answer
+=
[{
'word'
:
words
[
idx
-
1
],
'score'
:
score
[
label_idx
].
item
(),
'entity'
:
self
.
model
.
config
.
id2label
[
label_idx
]
'word'
:
words
[
idx
],
'score'
:
score
[
label_idx
].
item
(),
'entity'
:
self
.
model
.
config
.
id2label
[
label_idx
]
}]
# Update token start
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment