Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
c60749d6
Unverified
Commit
c60749d6
authored
Apr 25, 2024
by
Arthur
Committed by
GitHub
Apr 25, 2024
Browse files
[fix codellama conversion] (#30472)
* fix codellama conversion * nit
parent
e9b16354
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
2 additions
and
2 deletions
+2
-2
src/transformers/convert_slow_tokenizer.py
src/transformers/convert_slow_tokenizer.py
+2
-2
No files found.
src/transformers/convert_slow_tokenizer.py
View file @
c60749d6
...
...
@@ -1395,14 +1395,14 @@ class LlamaConverter(SpmConverter):
def
normalizer
(
self
,
proto
):
if
getattr
(
self
.
original_tokenizer
,
"legacy"
,
True
):
sequence
=
[]
if
getattr
(
self
.
original_tokenizer
,
"add_prefix_space"
):
if
getattr
(
self
.
original_tokenizer
,
"add_prefix_space"
,
True
):
sequence
+=
[
normalizers
.
Prepend
(
prepend
=
"▁"
)]
sequence
+=
[
normalizers
.
Replace
(
pattern
=
" "
,
content
=
"▁"
)]
return
normalizers
.
Sequence
(
sequence
)
return
None
# non-legacy, no normalizer
def
pre_tokenizer
(
self
,
replacement
,
add_prefix_space
):
if
not
self
.
original_tokenizer
.
legacy
:
# non-legacy, we need a replace
if
not
getattr
(
self
.
original_tokenizer
,
"
legacy
"
,
True
)
:
# non-legacy, we need a replace
prepend_scheme
=
_get_prepend_scheme
(
add_prefix_space
,
self
.
original_tokenizer
)
return
pre_tokenizers
.
Metaspace
(
replacement
=
replacement
,
prepend_scheme
=
prepend_scheme
,
split
=
False
)
return
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment