Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
908230d2
"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "0c92e7d9fa9f759b410b1ac85cb80f93208669d1"
Commit
908230d2
authored
Jan 24, 2020
by
Lysandre
Browse files
Pickle CamemBERT tokenizer
parent
24d5ad1d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
0 deletions
+18
-0
src/transformers/tokenization_camembert.py
src/transformers/tokenization_camembert.py
+18
-0
No files found.
src/transformers/tokenization_camembert.py
View file @
908230d2
...
@@ -169,6 +169,24 @@ class CamembertTokenizer(PreTrainedTokenizer):
...
@@ -169,6 +169,24 @@ class CamembertTokenizer(PreTrainedTokenizer):
return
self
.
fairseq_ids_to_tokens
[
index
]
return
self
.
fairseq_ids_to_tokens
[
index
]
return
self
.
sp_model
.
IdToPiece
(
index
-
self
.
fairseq_offset
)
return
self
.
sp_model
.
IdToPiece
(
index
-
self
.
fairseq_offset
)
def
__getstate__
(
self
):
state
=
self
.
__dict__
.
copy
()
state
[
"sp_model"
]
=
None
return
state
def
__setstate__
(
self
,
d
):
self
.
__dict__
=
d
try
:
import
sentencepiece
as
spm
except
ImportError
:
logger
.
warning
(
"You need to install SentencePiece to use AlbertTokenizer: https://github.com/google/sentencepiece"
"pip install sentencepiece"
)
raise
self
.
sp_model
=
spm
.
SentencePieceProcessor
()
self
.
sp_model
.
Load
(
self
.
vocab_file
)
def
convert_tokens_to_string
(
self
,
tokens
):
def
convert_tokens_to_string
(
self
,
tokens
):
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
out_string
=
""
.
join
(
tokens
).
replace
(
SPIECE_UNDERLINE
,
" "
).
strip
()
out_string
=
""
.
join
(
tokens
).
replace
(
SPIECE_UNDERLINE
,
" "
).
strip
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment