Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
f875fb0e
"vscode:/vscode.git/clone" did not exist on "304c6a1e0dcad79f4930858efd6a96f9e46a8fcc"
Unverified
Commit
f875fb0e
authored
Oct 20, 2021
by
Sylvain Gugger
Committed by
GitHub
Oct 20, 2021
Browse files
Fix label attribution in token classification examples (#14055)
parent
31560f63
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
24 additions
and
2 deletions
+24
-2
examples/pytorch/token-classification/run_ner.py
examples/pytorch/token-classification/run_ner.py
+12
-1
examples/pytorch/token-classification/run_ner_no_trainer.py
examples/pytorch/token-classification/run_ner_no_trainer.py
+12
-1
No files found.
examples/pytorch/token-classification/run_ner.py
View file @
f875fb0e
...
...
@@ -303,6 +303,14 @@ def main():
label_to_id
=
{
l
:
i
for
i
,
l
in
enumerate
(
label_list
)}
num_labels
=
len
(
label_list
)
# Map that sends B-Xxx label to its I-Xxx counterpart
b_to_i_label
=
[]
for
idx
,
label
in
enumerate
(
label_list
):
if
label
.
startswith
(
"B-"
)
and
label
.
replace
(
"B-"
,
"I-"
)
in
label_list
:
b_to_i_label
.
append
(
label_list
.
index
(
label
.
replace
(
"B-"
,
"I-"
)))
else
:
b_to_i_label
.
append
(
idx
)
# Load pretrained model and tokenizer
#
# Distributed training:
...
...
@@ -385,7 +393,10 @@ def main():
# For the other tokens in a word, we set the label to either the current label or -100, depending on
# the label_all_tokens flag.
else
:
label_ids
.
append
(
label_to_id
[
label
[
word_idx
]]
if
data_args
.
label_all_tokens
else
-
100
)
if
data_args
.
label_all_tokens
:
label_ids
.
append
(
b_to_i_label
[
label_to_id
[
label
[
word_idx
]]])
else
:
label_ids
.
append
(
-
100
)
previous_word_idx
=
word_idx
labels
.
append
(
label_ids
)
...
...
examples/pytorch/token-classification/run_ner_no_trainer.py
View file @
f875fb0e
...
...
@@ -328,6 +328,14 @@ def main():
label_to_id
=
{
l
:
i
for
i
,
l
in
enumerate
(
label_list
)}
num_labels
=
len
(
label_list
)
# Map that sends B-Xxx label to its I-Xxx counterpart
b_to_i_label
=
[]
for
idx
,
label
in
enumerate
(
label_list
):
if
label
.
startswith
(
"B-"
)
and
label
.
replace
(
"B-"
,
"I-"
)
in
label_list
:
b_to_i_label
.
append
(
label_list
.
index
(
label
.
replace
(
"B-"
,
"I-"
)))
else
:
b_to_i_label
.
append
(
idx
)
# Load pretrained model and tokenizer
#
# In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently
...
...
@@ -396,7 +404,10 @@ def main():
# For the other tokens in a word, we set the label to either the current label or -100, depending on
# the label_all_tokens flag.
else
:
label_ids
.
append
(
label_to_id
[
label
[
word_idx
]]
if
args
.
label_all_tokens
else
-
100
)
if
args
.
label_all_tokens
:
label_ids
.
append
(
b_to_i_label
[
label_to_id
[
label
[
word_idx
]]])
else
:
label_ids
.
append
(
-
100
)
previous_word_idx
=
word_idx
labels
.
append
(
label_ids
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment