Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
955b2b97
Unverified
Commit
955b2b97
authored
Jun 15, 2021
by
kumapo
Committed by
GitHub
Jun 15, 2021
Browse files
Enable add_prefix_space if model_type is roberta or gpt2 (#12116)
parent
60b1d6b4
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
27 additions
and
12 deletions
+27
-12
examples/pytorch/token-classification/run_ner.py
examples/pytorch/token-classification/run_ner.py
+20
-7
examples/pytorch/token-classification/run_ner_no_trainer.py
examples/pytorch/token-classification/run_ner_no_trainer.py
+7
-5
No files found.
examples/pytorch/token-classification/run_ner.py
View file @
955b2b97
...
@@ -304,13 +304,26 @@ def main():
...
@@ -304,13 +304,26 @@ def main():
revision
=
model_args
.
model_revision
,
revision
=
model_args
.
model_revision
,
use_auth_token
=
True
if
model_args
.
use_auth_token
else
None
,
use_auth_token
=
True
if
model_args
.
use_auth_token
else
None
,
)
)
tokenizer_name_or_path
=
model_args
.
tokenizer_name
if
model_args
.
tokenizer_name
else
model_args
.
model_name_or_path
if
config
.
model_type
in
{
"gpt2"
,
"roberta"
}:
tokenizer
=
AutoTokenizer
.
from_pretrained
(
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_args
.
tokenizer_name
if
model_args
.
tokenizer_name
else
model_args
.
model
_name_or_path
,
tokenizer
_name_or_path
,
cache_dir
=
model_args
.
cache_dir
,
cache_dir
=
model_args
.
cache_dir
,
use_fast
=
True
,
use_fast
=
True
,
revision
=
model_args
.
model_revision
,
revision
=
model_args
.
model_revision
,
use_auth_token
=
True
if
model_args
.
use_auth_token
else
None
,
use_auth_token
=
True
if
model_args
.
use_auth_token
else
None
,
add_prefix_space
=
True
,
)
)
else
:
tokenizer
=
AutoTokenizer
.
from_pretrained
(
tokenizer_name_or_path
,
cache_dir
=
model_args
.
cache_dir
,
use_fast
=
True
,
revision
=
model_args
.
model_revision
,
use_auth_token
=
True
if
model_args
.
use_auth_token
else
None
,
)
model
=
AutoModelForTokenClassification
.
from_pretrained
(
model
=
AutoModelForTokenClassification
.
from_pretrained
(
model_args
.
model_name_or_path
,
model_args
.
model_name_or_path
,
from_tf
=
bool
(
".ckpt"
in
model_args
.
model_name_or_path
),
from_tf
=
bool
(
".ckpt"
in
model_args
.
model_name_or_path
),
...
...
examples/pytorch/token-classification/run_ner_no_trainer.py
View file @
955b2b97
...
@@ -317,16 +317,18 @@ def main():
...
@@ -317,16 +317,18 @@ def main():
config
=
CONFIG_MAPPING
[
args
.
model_type
]()
config
=
CONFIG_MAPPING
[
args
.
model_type
]()
logger
.
warning
(
"You are instantiating a new config instance from scratch."
)
logger
.
warning
(
"You are instantiating a new config instance from scratch."
)
if
args
.
tokenizer_name
:
tokenizer_name_or_path
=
args
.
tokenizer_name
if
args
.
tokenizer_name
else
args
.
model_name_or_path
tokenizer
=
AutoTokenizer
.
from_pretrained
(
args
.
tokenizer_name
,
use_fast
=
True
)
if
not
tokenizer_name_or_path
:
elif
args
.
model_name_or_path
:
tokenizer
=
AutoTokenizer
.
from_pretrained
(
args
.
model_name_or_path
,
use_fast
=
True
)
else
:
raise
ValueError
(
raise
ValueError
(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
)
if
config
.
model_type
in
{
"gpt2"
,
"roberta"
}:
tokenizer
=
AutoTokenizer
.
from_pretrained
(
tokenizer_name_or_path
,
use_fast
=
True
,
add_prefix_space
=
True
)
else
:
tokenizer
=
AutoTokenizer
.
from_pretrained
(
tokenizer_name_or_path
,
use_fast
=
True
)
if
args
.
model_name_or_path
:
if
args
.
model_name_or_path
:
model
=
AutoModelForTokenClassification
.
from_pretrained
(
model
=
AutoModelForTokenClassification
.
from_pretrained
(
args
.
model_name_or_path
,
args
.
model_name_or_path
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment