Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
ComfyUI
Commits
10c919f4
Commit
10c919f4
authored
Jul 24, 2024
by
comfyanonymous
Browse files
Make it possible to load tokenizer data from checkpoints.
parent
ce80e69f
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
26 additions
and
31 deletions
+26
-31
comfy/sd.py
comfy/sd.py
+3
-3
comfy/sd1_clip.py
comfy/sd1_clip.py
+3
-3
comfy/sd2_clip.py
comfy/sd2_clip.py
+3
-3
comfy/sdxl_clip.py
comfy/sdxl_clip.py
+5
-5
comfy/text_encoders/aura_t5.py
comfy/text_encoders/aura_t5.py
+3
-3
comfy/text_encoders/sa_t5.py
comfy/text_encoders/sa_t5.py
+3
-3
comfy/text_encoders/sd3_clip.py
comfy/text_encoders/sd3_clip.py
+2
-11
comfy/text_encoders/spiece_tokenizer.py
comfy/text_encoders/spiece_tokenizer.py
+4
-0
No files found.
comfy/sd.py
View file @
10c919f4
...
...
@@ -60,7 +60,7 @@ def load_lora_for_models(model, clip, lora, strength_model, strength_clip):
class
CLIP
:
def
__init__
(
self
,
target
=
None
,
embedding_directory
=
None
,
no_init
=
False
):
def
__init__
(
self
,
target
=
None
,
embedding_directory
=
None
,
no_init
=
False
,
tokenizer_data
=
{}
):
if
no_init
:
return
params
=
target
.
params
.
copy
()
...
...
@@ -79,7 +79,7 @@ class CLIP:
if
not
model_management
.
supports_cast
(
load_device
,
dt
):
load_device
=
offload_device
self
.
tokenizer
=
tokenizer
(
embedding_directory
=
embedding_directory
)
self
.
tokenizer
=
tokenizer
(
embedding_directory
=
embedding_directory
,
tokenizer_data
=
tokenizer_data
)
self
.
patcher
=
comfy
.
model_patcher
.
ModelPatcher
(
self
.
cond_stage_model
,
load_device
=
load_device
,
offload_device
=
offload_device
)
self
.
layer_idx
=
None
logging
.
debug
(
"CLIP model load device: {}, offload device: {}"
.
format
(
load_device
,
offload_device
))
...
...
@@ -520,7 +520,7 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
if
clip_target
is
not
None
:
clip_sd
=
model_config
.
process_clip_state_dict
(
sd
)
if
len
(
clip_sd
)
>
0
:
clip
=
CLIP
(
clip_target
,
embedding_directory
=
embedding_directory
)
clip
=
CLIP
(
clip_target
,
embedding_directory
=
embedding_directory
,
tokenizer_data
=
clip_sd
)
m
,
u
=
clip
.
load_sd
(
clip_sd
,
full_model
=
True
)
if
len
(
m
)
>
0
:
m_filter
=
list
(
filter
(
lambda
a
:
".logit_scale"
not
in
a
and
".transformer.text_projection.weight"
not
in
a
,
m
))
...
...
comfy/sd1_clip.py
View file @
10c919f4
...
...
@@ -386,7 +386,7 @@ def load_embed(embedding_name, embedding_directory, embedding_size, embed_key=No
return
embed_out
class
SDTokenizer
:
def
__init__
(
self
,
tokenizer_path
=
None
,
max_length
=
77
,
pad_with_end
=
True
,
embedding_directory
=
None
,
embedding_size
=
768
,
embedding_key
=
'clip_l'
,
tokenizer_class
=
CLIPTokenizer
,
has_start_token
=
True
,
pad_to_max_length
=
True
,
min_length
=
None
,
pad_token
=
None
):
def
__init__
(
self
,
tokenizer_path
=
None
,
max_length
=
77
,
pad_with_end
=
True
,
embedding_directory
=
None
,
embedding_size
=
768
,
embedding_key
=
'clip_l'
,
tokenizer_class
=
CLIPTokenizer
,
has_start_token
=
True
,
pad_to_max_length
=
True
,
min_length
=
None
,
pad_token
=
None
,
tokenizer_data
=
{}
):
if
tokenizer_path
is
None
:
tokenizer_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"sd1_tokenizer"
)
self
.
tokenizer
=
tokenizer_class
.
from_pretrained
(
tokenizer_path
)
...
...
@@ -521,10 +521,10 @@ class SDTokenizer:
class
SD1Tokenizer
:
def
__init__
(
self
,
embedding_directory
=
None
,
clip_name
=
"l"
,
tokenizer
=
SDTokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{},
clip_name
=
"l"
,
tokenizer
=
SDTokenizer
):
self
.
clip_name
=
clip_name
self
.
clip
=
"clip_{}"
.
format
(
self
.
clip_name
)
setattr
(
self
,
self
.
clip
,
tokenizer
(
embedding_directory
=
embedding_directory
))
setattr
(
self
,
self
.
clip
,
tokenizer
(
embedding_directory
=
embedding_directory
,
tokenizer_data
=
tokenizer_data
))
def
tokenize_with_weights
(
self
,
text
:
str
,
return_word_ids
=
False
):
out
=
{}
...
...
comfy/sd2_clip.py
View file @
10c919f4
...
...
@@ -11,12 +11,12 @@ class SD2ClipHModel(sd1_clip.SDClipModel):
super
().
__init__
(
device
=
device
,
freeze
=
freeze
,
layer
=
layer
,
layer_idx
=
layer_idx
,
textmodel_json_config
=
textmodel_json_config
,
dtype
=
dtype
,
special_tokens
=
{
"start"
:
49406
,
"end"
:
49407
,
"pad"
:
0
})
class
SD2ClipHTokenizer
(
sd1_clip
.
SDTokenizer
):
def
__init__
(
self
,
tokenizer_path
=
None
,
embedding_directory
=
None
):
def
__init__
(
self
,
tokenizer_path
=
None
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
False
,
embedding_directory
=
embedding_directory
,
embedding_size
=
1024
)
class
SD2Tokenizer
(
sd1_clip
.
SD1Tokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
super
().
__init__
(
embedding_directory
=
embedding_directory
,
clip_name
=
"h"
,
tokenizer
=
SD2ClipHTokenizer
)
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
super
().
__init__
(
embedding_directory
=
embedding_directory
,
tokenizer_data
=
tokenizer_data
,
clip_name
=
"h"
,
tokenizer
=
SD2ClipHTokenizer
)
class
SD2ClipModel
(
sd1_clip
.
SD1ClipModel
):
def
__init__
(
self
,
device
=
"cpu"
,
dtype
=
None
,
**
kwargs
):
...
...
comfy/sdxl_clip.py
View file @
10c919f4
...
...
@@ -16,12 +16,12 @@ class SDXLClipG(sd1_clip.SDClipModel):
return
super
().
load_sd
(
sd
)
class
SDXLClipGTokenizer
(
sd1_clip
.
SDTokenizer
):
def
__init__
(
self
,
tokenizer_path
=
None
,
embedding_directory
=
None
):
def
__init__
(
self
,
tokenizer_path
=
None
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
False
,
embedding_directory
=
embedding_directory
,
embedding_size
=
1280
,
embedding_key
=
'clip_g'
)
class
SDXLTokenizer
:
def
__init__
(
self
,
embedding_directory
=
None
):
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
self
.
clip_l
=
sd1_clip
.
SDTokenizer
(
embedding_directory
=
embedding_directory
)
self
.
clip_g
=
SDXLClipGTokenizer
(
embedding_directory
=
embedding_directory
)
...
...
@@ -68,12 +68,12 @@ class SDXLRefinerClipModel(sd1_clip.SD1ClipModel):
class
StableCascadeClipGTokenizer
(
sd1_clip
.
SDTokenizer
):
def
__init__
(
self
,
tokenizer_path
=
None
,
embedding_directory
=
None
):
def
__init__
(
self
,
tokenizer_path
=
None
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
True
,
embedding_directory
=
embedding_directory
,
embedding_size
=
1280
,
embedding_key
=
'clip_g'
)
class
StableCascadeTokenizer
(
sd1_clip
.
SD1Tokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
super
().
__init__
(
embedding_directory
=
embedding_directory
,
clip_name
=
"g"
,
tokenizer
=
StableCascadeClipGTokenizer
)
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
super
().
__init__
(
embedding_directory
=
embedding_directory
,
tokenizer_data
=
tokenizer_data
,
clip_name
=
"g"
,
tokenizer
=
StableCascadeClipGTokenizer
)
class
StableCascadeClipG
(
sd1_clip
.
SDClipModel
):
def
__init__
(
self
,
device
=
"cpu"
,
max_length
=
77
,
freeze
=
True
,
layer
=
"hidden"
,
layer_idx
=-
1
,
dtype
=
None
):
...
...
comfy/text_encoders/aura_t5.py
View file @
10c919f4
...
...
@@ -9,13 +9,13 @@ class PT5XlModel(sd1_clip.SDClipModel):
super
().
__init__
(
device
=
device
,
layer
=
layer
,
layer_idx
=
layer_idx
,
textmodel_json_config
=
textmodel_json_config
,
dtype
=
dtype
,
special_tokens
=
{
"end"
:
2
,
"pad"
:
1
},
model_class
=
comfy
.
text_encoders
.
t5
.
T5
,
enable_attention_masks
=
True
,
zero_out_masked
=
True
)
class
PT5XlTokenizer
(
sd1_clip
.
SDTokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
tokenizer_path
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"t5_pile_tokenizer"
),
"tokenizer.model"
)
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
False
,
embedding_size
=
2048
,
embedding_key
=
'pile_t5xl'
,
tokenizer_class
=
SPieceTokenizer
,
has_start_token
=
False
,
pad_to_max_length
=
False
,
max_length
=
99999999
,
min_length
=
256
,
pad_token
=
1
)
class
AuraT5Tokenizer
(
sd1_clip
.
SD1Tokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
super
().
__init__
(
embedding_directory
=
embedding_directory
,
clip_name
=
"pile_t5xl"
,
tokenizer
=
PT5XlTokenizer
)
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
super
().
__init__
(
embedding_directory
=
embedding_directory
,
tokenizer_data
=
tokenizer_data
,
clip_name
=
"pile_t5xl"
,
tokenizer
=
PT5XlTokenizer
)
class
AuraT5Model
(
sd1_clip
.
SD1ClipModel
):
def
__init__
(
self
,
device
=
"cpu"
,
dtype
=
None
,
**
kwargs
):
...
...
comfy/text_encoders/sa_t5.py
View file @
10c919f4
...
...
@@ -9,13 +9,13 @@ class T5BaseModel(sd1_clip.SDClipModel):
super
().
__init__
(
device
=
device
,
layer
=
layer
,
layer_idx
=
layer_idx
,
textmodel_json_config
=
textmodel_json_config
,
dtype
=
dtype
,
special_tokens
=
{
"end"
:
1
,
"pad"
:
0
},
model_class
=
comfy
.
text_encoders
.
t5
.
T5
,
enable_attention_masks
=
True
,
zero_out_masked
=
True
)
class
T5BaseTokenizer
(
sd1_clip
.
SDTokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
tokenizer_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"t5_tokenizer"
)
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
False
,
embedding_size
=
768
,
embedding_key
=
't5base'
,
tokenizer_class
=
T5TokenizerFast
,
has_start_token
=
False
,
pad_to_max_length
=
False
,
max_length
=
99999999
,
min_length
=
128
)
class
SAT5Tokenizer
(
sd1_clip
.
SD1Tokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
super
().
__init__
(
embedding_directory
=
embedding_directory
,
clip_name
=
"t5base"
,
tokenizer
=
T5BaseTokenizer
)
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
super
().
__init__
(
embedding_directory
=
embedding_directory
,
tokenizer_data
=
tokenizer_data
,
clip_name
=
"t5base"
,
tokenizer
=
T5BaseTokenizer
)
class
SAT5Model
(
sd1_clip
.
SD1ClipModel
):
def
__init__
(
self
,
device
=
"cpu"
,
dtype
=
None
,
**
kwargs
):
...
...
comfy/text_encoders/sd3_clip.py
View file @
10c919f4
...
...
@@ -13,22 +13,13 @@ class T5XXLModel(sd1_clip.SDClipModel):
super
().
__init__
(
device
=
device
,
layer
=
layer
,
layer_idx
=
layer_idx
,
textmodel_json_config
=
textmodel_json_config
,
dtype
=
dtype
,
special_tokens
=
{
"end"
:
1
,
"pad"
:
0
},
model_class
=
comfy
.
text_encoders
.
t5
.
T5
)
class
T5XXLTokenizer
(
sd1_clip
.
SDTokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
tokenizer_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"t5_tokenizer"
)
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
False
,
embedding_size
=
4096
,
embedding_key
=
't5xxl'
,
tokenizer_class
=
T5TokenizerFast
,
has_start_token
=
False
,
pad_to_max_length
=
False
,
max_length
=
99999999
,
min_length
=
77
)
class
SDT5XXLTokenizer
(
sd1_clip
.
SD1Tokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
super
().
__init__
(
embedding_directory
=
embedding_directory
,
clip_name
=
"t5xxl"
,
tokenizer
=
T5XXLTokenizer
)
class
SDT5XXLModel
(
sd1_clip
.
SD1ClipModel
):
def
__init__
(
self
,
device
=
"cpu"
,
dtype
=
None
,
**
kwargs
):
super
().
__init__
(
device
=
device
,
dtype
=
dtype
,
clip_name
=
"t5xxl"
,
clip_model
=
T5XXLModel
,
**
kwargs
)
class
SD3Tokenizer
:
def
__init__
(
self
,
embedding_directory
=
None
):
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
self
.
clip_l
=
sd1_clip
.
SDTokenizer
(
embedding_directory
=
embedding_directory
)
self
.
clip_g
=
sdxl_clip
.
SDXLClipGTokenizer
(
embedding_directory
=
embedding_directory
)
self
.
t5xxl
=
T5XXLTokenizer
(
embedding_directory
=
embedding_directory
)
...
...
comfy/text_encoders/spiece_tokenizer.py
View file @
10c919f4
import
os
import
torch
class
SPieceTokenizer
:
add_eos
=
True
...
...
@@ -9,6 +10,9 @@ class SPieceTokenizer:
def
__init__
(
self
,
tokenizer_path
):
import
sentencepiece
if
torch
.
is_tensor
(
tokenizer_path
):
tokenizer_path
=
tokenizer_path
.
numpy
().
tobytes
()
if
isinstance
(
tokenizer_path
,
bytes
):
self
.
tokenizer
=
sentencepiece
.
SentencePieceProcessor
(
model_proto
=
tokenizer_path
,
add_eos
=
self
.
add_eos
)
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment