Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
e516a34a
Commit
e516a34a
authored
Dec 20, 2019
by
Morgan Funtowicz
Browse files
Use BasicTokenizer to split over whitespaces.
parent
9d0d1cd3
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
2 deletions
+9
-2
transformers/pipelines.py
transformers/pipelines.py
+9
-2
No files found.
transformers/pipelines.py
View file @
e516a34a
...
...
@@ -27,7 +27,7 @@ from typing import Union, Optional, Tuple, List, Dict
import
numpy
as
np
from
transformers
import
AutoConfig
,
AutoTokenizer
,
PreTrainedTokenizer
,
PretrainedConfig
,
\
SquadExample
,
squad_convert_examples_to_features
,
is_tf_available
,
is_torch_available
,
logger
SquadExample
,
squad_convert_examples_to_features
,
is_tf_available
,
is_torch_available
,
logger
,
BasicTokenizer
if
is_tf_available
():
import
tensorflow
as
tf
...
...
@@ -416,12 +416,19 @@ class NerPipeline(Pipeline):
Named Entity Recognition pipeline using ModelForTokenClassification head.
"""
def
__init__
(
self
,
model
,
tokenizer
:
PreTrainedTokenizer
=
None
,
args_parser
:
ArgumentHandler
=
None
,
device
:
int
=
-
1
,
binary_output
:
bool
=
False
):
super
().
__init__
(
model
,
tokenizer
,
args_parser
,
device
,
binary_output
)
self
.
_basic_tokenizer
=
BasicTokenizer
(
do_lower_case
=
False
)
def
__call__
(
self
,
*
texts
,
**
kwargs
):
inputs
,
answers
=
self
.
_args_parser
(
*
texts
,
**
kwargs
),
[]
for
sentence
in
inputs
:
# Ugly token to word idx mapping (for now)
token_to_word
,
words
=
[],
se
ntence
.
split
(
' '
)
token_to_word
,
words
=
[],
se
lf
.
_basic_tokenizer
.
tokenize
(
sentence
)
for
i
,
w
in
enumerate
(
words
):
tokens
=
self
.
tokenizer
.
tokenize
(
w
)
token_to_word
+=
[
i
]
*
len
(
tokens
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment