Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
ComfyUI
Commits
719c26c3
Commit
719c26c3
authored
Apr 15, 2023
by
comfyanonymous
Browse files
Merge branch 'master' of
https://github.com/BlenderNeko/ComfyUI
parents
a4049989
d0b1b6c6
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
87 additions
and
44 deletions
+87
-44
comfy/sd.py
comfy/sd.py
+8
-2
comfy/sd1_clip.py
comfy/sd1_clip.py
+79
-42
No files found.
comfy/sd.py
View file @
719c26c3
...
...
@@ -372,9 +372,15 @@ class CLIP:
def
clip_layer
(
self
,
layer_idx
):
self
.
layer_idx
=
layer_idx
def
encode
(
self
,
text
):
def
tokenize
(
self
,
text
,
return_word_ids
=
False
):
return
self
.
tokenizer
.
tokenize_with_weights
(
text
,
return_word_ids
)
def
encode
(
self
,
text
,
from_tokens
=
False
):
if
self
.
layer_idx
is
not
None
:
self
.
cond_stage_model
.
clip_layer
(
self
.
layer_idx
)
if
from_tokens
:
tokens
=
text
else
:
tokens
=
self
.
tokenizer
.
tokenize_with_weights
(
text
)
try
:
self
.
patcher
.
patch_model
()
...
...
comfy/sd1_clip.py
View file @
719c26c3
...
...
@@ -260,60 +260,97 @@ class SD1Tokenizer:
self
.
inv_vocab
=
{
v
:
k
for
k
,
v
in
vocab
.
items
()}
self
.
embedding_directory
=
embedding_directory
self
.
max_word_length
=
8
self
.
embedding_identifier
=
"embedding:"
def
tokenize_with_weights
(
self
,
text
):
text
=
escape_important
(
text
)
parsed_weights
=
token_weights
(
text
,
1.0
)
tokens
=
[]
for
t
in
parsed_weights
:
to_tokenize
=
unescape_important
(
t
[
0
]).
replace
(
"
\n
"
,
" "
).
split
(
' '
)
while
len
(
to_tokenize
)
>
0
:
word
=
to_tokenize
.
pop
(
0
)
temp_tokens
=
[]
embedding_identifier
=
"embedding:"
if
word
.
startswith
(
embedding_identifier
)
and
self
.
embedding_directory
is
not
None
:
embedding_name
=
word
[
len
(
embedding_identifier
):].
strip
(
'
\n
'
)
def
_try_get_embedding
(
self
,
embedding_name
:
str
):
'''
Takes a potential embedding name and tries to retrieve it.
Returns a Tuple consisting of the embedding and any leftover string, embedding can be None.
'''
embed
=
load_embed
(
embedding_name
,
self
.
embedding_directory
)
if
embed
is
None
:
stripped
=
embedding_name
.
strip
(
','
)
if
len
(
stripped
)
<
len
(
embedding_name
):
embed
=
load_embed
(
stripped
,
self
.
embedding_directory
)
if
embed
is
not
None
:
to_tokenize
.
insert
(
0
,
embedding_name
[
len
(
stripped
):])
return
(
embed
,
embedding_name
[
len
(
stripped
):])
return
(
embed
,
""
)
def
tokenize_with_weights
(
self
,
text
:
str
,
return_word_ids
=
False
):
'''
Takes a prompt and converts it to a list of (token, weight, word id) elements.
Tokens can both be integer tokens and pre computed CLIP tensors.
Word id values are unique per word and embedding, where the id 0 is reserved for non word tokens.
Returned list has the dimensions NxM where M is the input size of CLIP
'''
if
self
.
pad_with_end
:
pad_token
=
self
.
end_token
else
:
pad_token
=
0
text
=
escape_important
(
text
)
parsed_weights
=
token_weights
(
text
,
1.0
)
if
embed
is
not
None
:
#tokenize words
tokens
=
[]
for
weighted_segment
,
weight
in
parsed_weights
:
to_tokenize
=
unescape_important
(
weighted_segment
).
replace
(
"
\n
"
,
" "
).
split
(
' '
)
to_tokenize
=
[
x
for
x
in
to_tokenize
if
x
!=
""
]
for
word
in
to_tokenize
:
#if we find an embedding, deal with the embedding
if
word
.
startswith
(
self
.
embedding_identifier
)
and
self
.
embedding_directory
is
not
None
:
embedding_name
=
word
[
len
(
self
.
embedding_identifier
):].
strip
(
'
\n
'
)
embed
,
leftover
=
self
.
_try_get_embedding
(
embedding_name
)
if
embed
is
None
:
print
(
f
"warning, embedding:
{
embedding_name
}
does not exist, ignoring"
)
else
:
if
len
(
embed
.
shape
)
==
1
:
t
emp_tokens
+=
[(
embed
,
t
[
1
]
)]
t
okens
.
append
(
[(
embed
,
weight
)]
)
else
:
for
x
in
range
(
embed
.
shape
[
0
]):
temp_tokens
+=
[(
embed
[
x
],
t
[
1
])]
tokens
.
append
([(
embed
[
x
],
weight
)
for
x
in
range
(
embed
.
shape
[
0
])])
#if we accidentally have leftover text, continue parsing using leftover, else move on to next word
if
leftover
!=
""
:
word
=
leftover
else
:
print
(
"warning, embedding:{} does not exist, ignoring"
.
format
(
embedding_name
))
elif
len
(
word
)
>
0
:
tt
=
self
.
tokenizer
(
word
)[
"input_ids"
][
1
:
-
1
]
for
x
in
tt
:
temp_tokens
+=
[(
x
,
t
[
1
])]
tokens_left
=
self
.
max_tokens_per_section
-
(
len
(
tokens
)
%
self
.
max_tokens_per_section
)
#try not to split words in different sections
if
tokens_left
<
len
(
temp_tokens
)
and
len
(
temp_tokens
)
<
(
self
.
max_word_length
):
for
x
in
range
(
tokens_left
):
tokens
+=
[(
self
.
end_token
,
1.0
)]
tokens
+=
temp_tokens
out_tokens
=
[]
for
x
in
range
(
0
,
len
(
tokens
),
self
.
max_tokens_per_section
):
o_token
=
[(
self
.
start_token
,
1.0
)]
+
tokens
[
x
:
min
(
self
.
max_tokens_per_section
+
x
,
len
(
tokens
))]
o_token
+=
[(
self
.
end_token
,
1.0
)]
if
self
.
pad_with_end
:
o_token
+=
[(
self
.
end_token
,
1.0
)]
*
(
self
.
max_length
-
len
(
o_token
))
continue
#parse word
tokens
.
append
([(
t
,
weight
)
for
t
in
self
.
tokenizer
(
word
)[
"input_ids"
][
1
:
-
1
]])
#reshape token array to CLIP input size
batched_tokens
=
[]
batch
=
[(
self
.
start_token
,
1.0
,
0
)]
batched_tokens
.
append
(
batch
)
for
i
,
t_group
in
enumerate
(
tokens
):
#determine if we're going to try and keep the tokens in a single batch
is_large
=
len
(
t_group
)
>=
self
.
max_word_length
while
len
(
t_group
)
>
0
:
if
len
(
t_group
)
+
len
(
batch
)
>
self
.
max_length
-
1
:
remaining_length
=
self
.
max_length
-
len
(
batch
)
-
1
#break word in two and add end token
if
is_large
:
batch
.
extend
([(
t
,
w
,
i
+
1
)
for
t
,
w
in
t_group
[:
remaining_length
]])
batch
.
append
((
self
.
end_token
,
1.0
,
0
))
t_group
=
t_group
[
remaining_length
:]
#add end token and pad
else
:
batch
.
append
((
self
.
end_token
,
1.0
,
0
))
batch
.
extend
([(
pad_token
,
1.0
,
0
)]
*
(
remaining_length
))
#start new batch
batch
=
[(
self
.
start_token
,
1.0
,
0
)]
batched_tokens
.
append
(
batch
)
else
:
o_token
+=
[(
0
,
1.0
)]
*
(
self
.
max_length
-
len
(
o_token
))
batch
.
extend
([(
t
,
w
,
i
+
1
)
for
t
,
w
in
t_group
])
t_group
=
[]
out_tokens
+=
[
o_token
]
#fill last batch
batch
.
extend
([(
self
.
end_token
,
1.0
,
0
)]
+
[(
pad_token
,
1.0
,
0
)]
*
(
self
.
max_length
-
len
(
batch
)
-
1
))
if
not
return_word_ids
:
batched_tokens
=
[[(
t
,
w
)
for
t
,
w
,
_
in
x
]
for
x
in
batched_tokens
]
return
batched_tokens
return
out_tokens
def
untokenize
(
self
,
token_weight_pair
):
return
list
(
map
(
lambda
a
:
(
a
,
self
.
inv_vocab
[
a
[
0
]]),
token_weight_pair
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment