Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
ComfyUI
Commits
80c45909
Commit
80c45909
authored
Jul 06, 2024
by
comfyanonymous
Browse files
Allow specifying the padding token for the tokenizer.
parent
ce649d61
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
8 deletions
+12
-8
comfy/sd1_clip.py
comfy/sd1_clip.py
+12
-8
No files found.
comfy/sd1_clip.py
View file @
80c45909
...
...
@@ -364,7 +364,7 @@ def load_embed(embedding_name, embedding_directory, embedding_size, embed_key=No
return
embed_out
class
SDTokenizer
:
def
__init__
(
self
,
tokenizer_path
=
None
,
max_length
=
77
,
pad_with_end
=
True
,
embedding_directory
=
None
,
embedding_size
=
768
,
embedding_key
=
'clip_l'
,
tokenizer_class
=
CLIPTokenizer
,
has_start_token
=
True
,
pad_to_max_length
=
True
,
min_length
=
None
):
def
__init__
(
self
,
tokenizer_path
=
None
,
max_length
=
77
,
pad_with_end
=
True
,
embedding_directory
=
None
,
embedding_size
=
768
,
embedding_key
=
'clip_l'
,
tokenizer_class
=
CLIPTokenizer
,
has_start_token
=
True
,
pad_to_max_length
=
True
,
min_length
=
None
,
pad_token
=
None
):
if
tokenizer_path
is
None
:
tokenizer_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"sd1_tokenizer"
)
self
.
tokenizer
=
tokenizer_class
.
from_pretrained
(
tokenizer_path
)
...
...
@@ -380,6 +380,14 @@ class SDTokenizer:
self
.
tokens_start
=
0
self
.
start_token
=
None
self
.
end_token
=
empty
[
0
]
if
pad_token
is
not
None
:
self
.
pad_token
=
pad_token
elif
pad_with_end
:
self
.
pad_token
=
self
.
end_token
else
:
self
.
pad_token
=
0
self
.
pad_with_end
=
pad_with_end
self
.
pad_to_max_length
=
pad_to_max_length
...
...
@@ -412,10 +420,6 @@ class SDTokenizer:
Word id values are unique per word and embedding, where the id 0 is reserved for non word tokens.
Returned list has the dimensions NxM where M is the input size of CLIP
'''
if
self
.
pad_with_end
:
pad_token
=
self
.
end_token
else
:
pad_token
=
0
text
=
escape_important
(
text
)
parsed_weights
=
token_weights
(
text
,
1.0
)
...
...
@@ -467,7 +471,7 @@ class SDTokenizer:
else
:
batch
.
append
((
self
.
end_token
,
1.0
,
0
))
if
self
.
pad_to_max_length
:
batch
.
extend
([(
pad_token
,
1.0
,
0
)]
*
(
remaining_length
))
batch
.
extend
([(
self
.
pad_token
,
1.0
,
0
)]
*
(
remaining_length
))
#start new batch
batch
=
[]
if
self
.
start_token
is
not
None
:
...
...
@@ -480,9 +484,9 @@ class SDTokenizer:
#fill last batch
batch
.
append
((
self
.
end_token
,
1.0
,
0
))
if
self
.
pad_to_max_length
:
batch
.
extend
([(
pad_token
,
1.0
,
0
)]
*
(
self
.
max_length
-
len
(
batch
)))
batch
.
extend
([(
self
.
pad_token
,
1.0
,
0
)]
*
(
self
.
max_length
-
len
(
batch
)))
if
self
.
min_length
is
not
None
and
len
(
batch
)
<
self
.
min_length
:
batch
.
extend
([(
pad_token
,
1.0
,
0
)]
*
(
self
.
min_length
-
len
(
batch
)))
batch
.
extend
([(
self
.
pad_token
,
1.0
,
0
)]
*
(
self
.
min_length
-
len
(
batch
)))
if
not
return_word_ids
:
batched_tokens
=
[[(
t
,
w
)
for
t
,
w
,
_
in
x
]
for
x
in
batched_tokens
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment