Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
ComfyUI
Commits
80c45909
"...targets/git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "60c6738a39fbf49f80c32f55b4aeec4125b75819"
Commit
80c45909
authored
Jul 06, 2024
by
comfyanonymous
Browse files
Allow specifying the padding token for the tokenizer.
parent
ce649d61
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
8 deletions
+12
-8
comfy/sd1_clip.py
comfy/sd1_clip.py
+12
-8
No files found.
comfy/sd1_clip.py
View file @
80c45909
...
...
@@ -364,7 +364,7 @@ def load_embed(embedding_name, embedding_directory, embedding_size, embed_key=No
return
embed_out
class
SDTokenizer
:
def
__init__
(
self
,
tokenizer_path
=
None
,
max_length
=
77
,
pad_with_end
=
True
,
embedding_directory
=
None
,
embedding_size
=
768
,
embedding_key
=
'clip_l'
,
tokenizer_class
=
CLIPTokenizer
,
has_start_token
=
True
,
pad_to_max_length
=
True
,
min_length
=
None
):
def
__init__
(
self
,
tokenizer_path
=
None
,
max_length
=
77
,
pad_with_end
=
True
,
embedding_directory
=
None
,
embedding_size
=
768
,
embedding_key
=
'clip_l'
,
tokenizer_class
=
CLIPTokenizer
,
has_start_token
=
True
,
pad_to_max_length
=
True
,
min_length
=
None
,
pad_token
=
None
):
if
tokenizer_path
is
None
:
tokenizer_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"sd1_tokenizer"
)
self
.
tokenizer
=
tokenizer_class
.
from_pretrained
(
tokenizer_path
)
...
...
@@ -380,6 +380,14 @@ class SDTokenizer:
self
.
tokens_start
=
0
self
.
start_token
=
None
self
.
end_token
=
empty
[
0
]
if
pad_token
is
not
None
:
self
.
pad_token
=
pad_token
elif
pad_with_end
:
self
.
pad_token
=
self
.
end_token
else
:
self
.
pad_token
=
0
self
.
pad_with_end
=
pad_with_end
self
.
pad_to_max_length
=
pad_to_max_length
...
...
@@ -412,10 +420,6 @@ class SDTokenizer:
Word id values are unique per word and embedding, where the id 0 is reserved for non word tokens.
Returned list has the dimensions NxM where M is the input size of CLIP
'''
if
self
.
pad_with_end
:
pad_token
=
self
.
end_token
else
:
pad_token
=
0
text
=
escape_important
(
text
)
parsed_weights
=
token_weights
(
text
,
1.0
)
...
...
@@ -467,7 +471,7 @@ class SDTokenizer:
else
:
batch
.
append
((
self
.
end_token
,
1.0
,
0
))
if
self
.
pad_to_max_length
:
batch
.
extend
([(
pad_token
,
1.0
,
0
)]
*
(
remaining_length
))
batch
.
extend
([(
self
.
pad_token
,
1.0
,
0
)]
*
(
remaining_length
))
#start new batch
batch
=
[]
if
self
.
start_token
is
not
None
:
...
...
@@ -480,9 +484,9 @@ class SDTokenizer:
#fill last batch
batch
.
append
((
self
.
end_token
,
1.0
,
0
))
if
self
.
pad_to_max_length
:
batch
.
extend
([(
pad_token
,
1.0
,
0
)]
*
(
self
.
max_length
-
len
(
batch
)))
batch
.
extend
([(
self
.
pad_token
,
1.0
,
0
)]
*
(
self
.
max_length
-
len
(
batch
)))
if
self
.
min_length
is
not
None
and
len
(
batch
)
<
self
.
min_length
:
batch
.
extend
([(
pad_token
,
1.0
,
0
)]
*
(
self
.
min_length
-
len
(
batch
)))
batch
.
extend
([(
self
.
pad_token
,
1.0
,
0
)]
*
(
self
.
min_length
-
len
(
batch
)))
if
not
return_word_ids
:
batched_tokens
=
[[(
t
,
w
)
for
t
,
w
,
_
in
x
]
for
x
in
batched_tokens
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment