Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
20b65860
Unverified
Commit
20b65860
authored
Nov 19, 2020
by
Sylvain Gugger
Committed by
GitHub
Nov 19, 2020
Browse files
Fix run_ner script (#8664)
* Fix run_ner script * Pin datasets
parent
ca0109bd
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
19 additions
and
8 deletions
+19
-8
examples/requirements.txt
examples/requirements.txt
+1
-1
examples/token-classification/run_ner.py
examples/token-classification/run_ner.py
+18
-7
No files found.
examples/requirements.txt
View file @
20b65860
...
@@ -13,7 +13,7 @@ streamlit
...
@@ -13,7 +13,7 @@ streamlit
elasticsearch
elasticsearch
nltk
nltk
pandas
pandas
datasets
datasets
>= 1.1.3
fire
fire
pytest
pytest
conllu
conllu
...
...
examples/token-classification/run_ner.py
View file @
20b65860
...
@@ -15,7 +15,8 @@
...
@@ -15,7 +15,8 @@
"""
"""
Fine-tuning the library models for token classification.
Fine-tuning the library models for token classification.
"""
"""
# You can also adapt this script on your own token classification task and datasets. Pointers for this are left as comments.
# You can also adapt this script on your own token classification task and datasets. Pointers for this are left as
# comments.
import
logging
import
logging
import
os
import
os
...
@@ -24,7 +25,7 @@ from dataclasses import dataclass, field
...
@@ -24,7 +25,7 @@ from dataclasses import dataclass, field
from
typing
import
Optional
from
typing
import
Optional
import
numpy
as
np
import
numpy
as
np
from
datasets
import
load_dataset
from
datasets
import
ClassLabel
,
load_dataset
from
seqeval.metrics
import
accuracy_score
,
f1_score
,
precision_score
,
recall_score
from
seqeval.metrics
import
accuracy_score
,
f1_score
,
precision_score
,
recall_score
import
transformers
import
transformers
...
@@ -198,12 +199,17 @@ def main():
...
@@ -198,12 +199,17 @@ def main():
if
training_args
.
do_train
:
if
training_args
.
do_train
:
column_names
=
datasets
[
"train"
].
column_names
column_names
=
datasets
[
"train"
].
column_names
features
=
datasets
[
"train"
].
features
else
:
else
:
column_names
=
datasets
[
"validation"
].
column_names
column_names
=
datasets
[
"validation"
].
column_names
text_column_name
=
"words"
if
"words"
in
column_names
else
column_names
[
0
]
features
=
datasets
[
"validation"
].
features
label_column_name
=
data_args
.
task_name
if
data_args
.
task_name
in
column_names
else
column_names
[
1
]
text_column_name
=
"tokens"
if
"tokens"
in
column_names
else
column_names
[
0
]
label_column_name
=
(
f
"
{
data_args
.
task_name
}
_tags"
if
f
"
{
data_args
.
task_name
}
_tags"
in
column_names
else
column_names
[
1
]
)
# Labeling (this part will be easier when https://github.com/huggingface/datasets/issues/797 is solved)
# In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the
# unique labels.
def
get_label_list
(
labels
):
def
get_label_list
(
labels
):
unique_labels
=
set
()
unique_labels
=
set
()
for
label
in
labels
:
for
label
in
labels
:
...
@@ -212,8 +218,13 @@ def main():
...
@@ -212,8 +218,13 @@ def main():
label_list
.
sort
()
label_list
.
sort
()
return
label_list
return
label_list
label_list
=
get_label_list
(
datasets
[
"train"
][
label_column_name
])
if
isinstance
(
features
[
label_column_name
].
feature
,
ClassLabel
):
label_to_id
=
{
l
:
i
for
i
,
l
in
enumerate
(
label_list
)}
label_list
=
features
[
label_column_name
].
feature
.
names
# No need to convert the labels since they are already ints.
label_to_id
=
{
i
:
i
for
i
in
range
(
len
(
label_list
))}
else
:
label_list
=
get_label_list
(
datasets
[
"train"
][
label_column_name
])
label_to_id
=
{
l
:
i
for
i
,
l
in
enumerate
(
label_list
)}
num_labels
=
len
(
label_list
)
num_labels
=
len
(
label_list
)
# Load pretrained model and tokenizer
# Load pretrained model and tokenizer
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment