Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
ComfyUI
Commits
10c919f4
"docs/source/en/vscode:/vscode.git/clone" did not exist on "8092017d3f67b8da09a85c2368ea5d88ae8b4a6e"
Commit
10c919f4
authored
Jul 24, 2024
by
comfyanonymous
Browse files
Make it possible to load tokenizer data from checkpoints.
parent
ce80e69f
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
26 additions
and
31 deletions
+26
-31
comfy/sd.py
comfy/sd.py
+3
-3
comfy/sd1_clip.py
comfy/sd1_clip.py
+3
-3
comfy/sd2_clip.py
comfy/sd2_clip.py
+3
-3
comfy/sdxl_clip.py
comfy/sdxl_clip.py
+5
-5
comfy/text_encoders/aura_t5.py
comfy/text_encoders/aura_t5.py
+3
-3
comfy/text_encoders/sa_t5.py
comfy/text_encoders/sa_t5.py
+3
-3
comfy/text_encoders/sd3_clip.py
comfy/text_encoders/sd3_clip.py
+2
-11
comfy/text_encoders/spiece_tokenizer.py
comfy/text_encoders/spiece_tokenizer.py
+4
-0
No files found.
comfy/sd.py
View file @
10c919f4
...
@@ -60,7 +60,7 @@ def load_lora_for_models(model, clip, lora, strength_model, strength_clip):
...
@@ -60,7 +60,7 @@ def load_lora_for_models(model, clip, lora, strength_model, strength_clip):
class
CLIP
:
class
CLIP
:
def
__init__
(
self
,
target
=
None
,
embedding_directory
=
None
,
no_init
=
False
):
def
__init__
(
self
,
target
=
None
,
embedding_directory
=
None
,
no_init
=
False
,
tokenizer_data
=
{}
):
if
no_init
:
if
no_init
:
return
return
params
=
target
.
params
.
copy
()
params
=
target
.
params
.
copy
()
...
@@ -79,7 +79,7 @@ class CLIP:
...
@@ -79,7 +79,7 @@ class CLIP:
if
not
model_management
.
supports_cast
(
load_device
,
dt
):
if
not
model_management
.
supports_cast
(
load_device
,
dt
):
load_device
=
offload_device
load_device
=
offload_device
self
.
tokenizer
=
tokenizer
(
embedding_directory
=
embedding_directory
)
self
.
tokenizer
=
tokenizer
(
embedding_directory
=
embedding_directory
,
tokenizer_data
=
tokenizer_data
)
self
.
patcher
=
comfy
.
model_patcher
.
ModelPatcher
(
self
.
cond_stage_model
,
load_device
=
load_device
,
offload_device
=
offload_device
)
self
.
patcher
=
comfy
.
model_patcher
.
ModelPatcher
(
self
.
cond_stage_model
,
load_device
=
load_device
,
offload_device
=
offload_device
)
self
.
layer_idx
=
None
self
.
layer_idx
=
None
logging
.
debug
(
"CLIP model load device: {}, offload device: {}"
.
format
(
load_device
,
offload_device
))
logging
.
debug
(
"CLIP model load device: {}, offload device: {}"
.
format
(
load_device
,
offload_device
))
...
@@ -520,7 +520,7 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
...
@@ -520,7 +520,7 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
if
clip_target
is
not
None
:
if
clip_target
is
not
None
:
clip_sd
=
model_config
.
process_clip_state_dict
(
sd
)
clip_sd
=
model_config
.
process_clip_state_dict
(
sd
)
if
len
(
clip_sd
)
>
0
:
if
len
(
clip_sd
)
>
0
:
clip
=
CLIP
(
clip_target
,
embedding_directory
=
embedding_directory
)
clip
=
CLIP
(
clip_target
,
embedding_directory
=
embedding_directory
,
tokenizer_data
=
clip_sd
)
m
,
u
=
clip
.
load_sd
(
clip_sd
,
full_model
=
True
)
m
,
u
=
clip
.
load_sd
(
clip_sd
,
full_model
=
True
)
if
len
(
m
)
>
0
:
if
len
(
m
)
>
0
:
m_filter
=
list
(
filter
(
lambda
a
:
".logit_scale"
not
in
a
and
".transformer.text_projection.weight"
not
in
a
,
m
))
m_filter
=
list
(
filter
(
lambda
a
:
".logit_scale"
not
in
a
and
".transformer.text_projection.weight"
not
in
a
,
m
))
...
...
comfy/sd1_clip.py
View file @
10c919f4
...
@@ -386,7 +386,7 @@ def load_embed(embedding_name, embedding_directory, embedding_size, embed_key=No
...
@@ -386,7 +386,7 @@ def load_embed(embedding_name, embedding_directory, embedding_size, embed_key=No
return
embed_out
return
embed_out
class
SDTokenizer
:
class
SDTokenizer
:
def
__init__
(
self
,
tokenizer_path
=
None
,
max_length
=
77
,
pad_with_end
=
True
,
embedding_directory
=
None
,
embedding_size
=
768
,
embedding_key
=
'clip_l'
,
tokenizer_class
=
CLIPTokenizer
,
has_start_token
=
True
,
pad_to_max_length
=
True
,
min_length
=
None
,
pad_token
=
None
):
def
__init__
(
self
,
tokenizer_path
=
None
,
max_length
=
77
,
pad_with_end
=
True
,
embedding_directory
=
None
,
embedding_size
=
768
,
embedding_key
=
'clip_l'
,
tokenizer_class
=
CLIPTokenizer
,
has_start_token
=
True
,
pad_to_max_length
=
True
,
min_length
=
None
,
pad_token
=
None
,
tokenizer_data
=
{}
):
if
tokenizer_path
is
None
:
if
tokenizer_path
is
None
:
tokenizer_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"sd1_tokenizer"
)
tokenizer_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"sd1_tokenizer"
)
self
.
tokenizer
=
tokenizer_class
.
from_pretrained
(
tokenizer_path
)
self
.
tokenizer
=
tokenizer_class
.
from_pretrained
(
tokenizer_path
)
...
@@ -521,10 +521,10 @@ class SDTokenizer:
...
@@ -521,10 +521,10 @@ class SDTokenizer:
class
SD1Tokenizer
:
class
SD1Tokenizer
:
def
__init__
(
self
,
embedding_directory
=
None
,
clip_name
=
"l"
,
tokenizer
=
SDTokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{},
clip_name
=
"l"
,
tokenizer
=
SDTokenizer
):
self
.
clip_name
=
clip_name
self
.
clip_name
=
clip_name
self
.
clip
=
"clip_{}"
.
format
(
self
.
clip_name
)
self
.
clip
=
"clip_{}"
.
format
(
self
.
clip_name
)
setattr
(
self
,
self
.
clip
,
tokenizer
(
embedding_directory
=
embedding_directory
))
setattr
(
self
,
self
.
clip
,
tokenizer
(
embedding_directory
=
embedding_directory
,
tokenizer_data
=
tokenizer_data
))
def
tokenize_with_weights
(
self
,
text
:
str
,
return_word_ids
=
False
):
def
tokenize_with_weights
(
self
,
text
:
str
,
return_word_ids
=
False
):
out
=
{}
out
=
{}
...
...
comfy/sd2_clip.py
View file @
10c919f4
...
@@ -11,12 +11,12 @@ class SD2ClipHModel(sd1_clip.SDClipModel):
...
@@ -11,12 +11,12 @@ class SD2ClipHModel(sd1_clip.SDClipModel):
super
().
__init__
(
device
=
device
,
freeze
=
freeze
,
layer
=
layer
,
layer_idx
=
layer_idx
,
textmodel_json_config
=
textmodel_json_config
,
dtype
=
dtype
,
special_tokens
=
{
"start"
:
49406
,
"end"
:
49407
,
"pad"
:
0
})
super
().
__init__
(
device
=
device
,
freeze
=
freeze
,
layer
=
layer
,
layer_idx
=
layer_idx
,
textmodel_json_config
=
textmodel_json_config
,
dtype
=
dtype
,
special_tokens
=
{
"start"
:
49406
,
"end"
:
49407
,
"pad"
:
0
})
class
SD2ClipHTokenizer
(
sd1_clip
.
SDTokenizer
):
class
SD2ClipHTokenizer
(
sd1_clip
.
SDTokenizer
):
def
__init__
(
self
,
tokenizer_path
=
None
,
embedding_directory
=
None
):
def
__init__
(
self
,
tokenizer_path
=
None
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
False
,
embedding_directory
=
embedding_directory
,
embedding_size
=
1024
)
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
False
,
embedding_directory
=
embedding_directory
,
embedding_size
=
1024
)
class
SD2Tokenizer
(
sd1_clip
.
SD1Tokenizer
):
class
SD2Tokenizer
(
sd1_clip
.
SD1Tokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
super
().
__init__
(
embedding_directory
=
embedding_directory
,
clip_name
=
"h"
,
tokenizer
=
SD2ClipHTokenizer
)
super
().
__init__
(
embedding_directory
=
embedding_directory
,
tokenizer_data
=
tokenizer_data
,
clip_name
=
"h"
,
tokenizer
=
SD2ClipHTokenizer
)
class
SD2ClipModel
(
sd1_clip
.
SD1ClipModel
):
class
SD2ClipModel
(
sd1_clip
.
SD1ClipModel
):
def
__init__
(
self
,
device
=
"cpu"
,
dtype
=
None
,
**
kwargs
):
def
__init__
(
self
,
device
=
"cpu"
,
dtype
=
None
,
**
kwargs
):
...
...
comfy/sdxl_clip.py
View file @
10c919f4
...
@@ -16,12 +16,12 @@ class SDXLClipG(sd1_clip.SDClipModel):
...
@@ -16,12 +16,12 @@ class SDXLClipG(sd1_clip.SDClipModel):
return
super
().
load_sd
(
sd
)
return
super
().
load_sd
(
sd
)
class
SDXLClipGTokenizer
(
sd1_clip
.
SDTokenizer
):
class
SDXLClipGTokenizer
(
sd1_clip
.
SDTokenizer
):
def
__init__
(
self
,
tokenizer_path
=
None
,
embedding_directory
=
None
):
def
__init__
(
self
,
tokenizer_path
=
None
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
False
,
embedding_directory
=
embedding_directory
,
embedding_size
=
1280
,
embedding_key
=
'clip_g'
)
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
False
,
embedding_directory
=
embedding_directory
,
embedding_size
=
1280
,
embedding_key
=
'clip_g'
)
class
SDXLTokenizer
:
class
SDXLTokenizer
:
def
__init__
(
self
,
embedding_directory
=
None
):
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
self
.
clip_l
=
sd1_clip
.
SDTokenizer
(
embedding_directory
=
embedding_directory
)
self
.
clip_l
=
sd1_clip
.
SDTokenizer
(
embedding_directory
=
embedding_directory
)
self
.
clip_g
=
SDXLClipGTokenizer
(
embedding_directory
=
embedding_directory
)
self
.
clip_g
=
SDXLClipGTokenizer
(
embedding_directory
=
embedding_directory
)
...
@@ -68,12 +68,12 @@ class SDXLRefinerClipModel(sd1_clip.SD1ClipModel):
...
@@ -68,12 +68,12 @@ class SDXLRefinerClipModel(sd1_clip.SD1ClipModel):
class
StableCascadeClipGTokenizer
(
sd1_clip
.
SDTokenizer
):
class
StableCascadeClipGTokenizer
(
sd1_clip
.
SDTokenizer
):
def
__init__
(
self
,
tokenizer_path
=
None
,
embedding_directory
=
None
):
def
__init__
(
self
,
tokenizer_path
=
None
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
True
,
embedding_directory
=
embedding_directory
,
embedding_size
=
1280
,
embedding_key
=
'clip_g'
)
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
True
,
embedding_directory
=
embedding_directory
,
embedding_size
=
1280
,
embedding_key
=
'clip_g'
)
class
StableCascadeTokenizer
(
sd1_clip
.
SD1Tokenizer
):
class
StableCascadeTokenizer
(
sd1_clip
.
SD1Tokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
super
().
__init__
(
embedding_directory
=
embedding_directory
,
clip_name
=
"g"
,
tokenizer
=
StableCascadeClipGTokenizer
)
super
().
__init__
(
embedding_directory
=
embedding_directory
,
tokenizer_data
=
tokenizer_data
,
clip_name
=
"g"
,
tokenizer
=
StableCascadeClipGTokenizer
)
class
StableCascadeClipG
(
sd1_clip
.
SDClipModel
):
class
StableCascadeClipG
(
sd1_clip
.
SDClipModel
):
def
__init__
(
self
,
device
=
"cpu"
,
max_length
=
77
,
freeze
=
True
,
layer
=
"hidden"
,
layer_idx
=-
1
,
dtype
=
None
):
def
__init__
(
self
,
device
=
"cpu"
,
max_length
=
77
,
freeze
=
True
,
layer
=
"hidden"
,
layer_idx
=-
1
,
dtype
=
None
):
...
...
comfy/text_encoders/aura_t5.py
View file @
10c919f4
...
@@ -9,13 +9,13 @@ class PT5XlModel(sd1_clip.SDClipModel):
...
@@ -9,13 +9,13 @@ class PT5XlModel(sd1_clip.SDClipModel):
super
().
__init__
(
device
=
device
,
layer
=
layer
,
layer_idx
=
layer_idx
,
textmodel_json_config
=
textmodel_json_config
,
dtype
=
dtype
,
special_tokens
=
{
"end"
:
2
,
"pad"
:
1
},
model_class
=
comfy
.
text_encoders
.
t5
.
T5
,
enable_attention_masks
=
True
,
zero_out_masked
=
True
)
super
().
__init__
(
device
=
device
,
layer
=
layer
,
layer_idx
=
layer_idx
,
textmodel_json_config
=
textmodel_json_config
,
dtype
=
dtype
,
special_tokens
=
{
"end"
:
2
,
"pad"
:
1
},
model_class
=
comfy
.
text_encoders
.
t5
.
T5
,
enable_attention_masks
=
True
,
zero_out_masked
=
True
)
class
PT5XlTokenizer
(
sd1_clip
.
SDTokenizer
):
class
PT5XlTokenizer
(
sd1_clip
.
SDTokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
tokenizer_path
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"t5_pile_tokenizer"
),
"tokenizer.model"
)
tokenizer_path
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"t5_pile_tokenizer"
),
"tokenizer.model"
)
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
False
,
embedding_size
=
2048
,
embedding_key
=
'pile_t5xl'
,
tokenizer_class
=
SPieceTokenizer
,
has_start_token
=
False
,
pad_to_max_length
=
False
,
max_length
=
99999999
,
min_length
=
256
,
pad_token
=
1
)
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
False
,
embedding_size
=
2048
,
embedding_key
=
'pile_t5xl'
,
tokenizer_class
=
SPieceTokenizer
,
has_start_token
=
False
,
pad_to_max_length
=
False
,
max_length
=
99999999
,
min_length
=
256
,
pad_token
=
1
)
class
AuraT5Tokenizer
(
sd1_clip
.
SD1Tokenizer
):
class
AuraT5Tokenizer
(
sd1_clip
.
SD1Tokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
super
().
__init__
(
embedding_directory
=
embedding_directory
,
clip_name
=
"pile_t5xl"
,
tokenizer
=
PT5XlTokenizer
)
super
().
__init__
(
embedding_directory
=
embedding_directory
,
tokenizer_data
=
tokenizer_data
,
clip_name
=
"pile_t5xl"
,
tokenizer
=
PT5XlTokenizer
)
class
AuraT5Model
(
sd1_clip
.
SD1ClipModel
):
class
AuraT5Model
(
sd1_clip
.
SD1ClipModel
):
def
__init__
(
self
,
device
=
"cpu"
,
dtype
=
None
,
**
kwargs
):
def
__init__
(
self
,
device
=
"cpu"
,
dtype
=
None
,
**
kwargs
):
...
...
comfy/text_encoders/sa_t5.py
View file @
10c919f4
...
@@ -9,13 +9,13 @@ class T5BaseModel(sd1_clip.SDClipModel):
...
@@ -9,13 +9,13 @@ class T5BaseModel(sd1_clip.SDClipModel):
super
().
__init__
(
device
=
device
,
layer
=
layer
,
layer_idx
=
layer_idx
,
textmodel_json_config
=
textmodel_json_config
,
dtype
=
dtype
,
special_tokens
=
{
"end"
:
1
,
"pad"
:
0
},
model_class
=
comfy
.
text_encoders
.
t5
.
T5
,
enable_attention_masks
=
True
,
zero_out_masked
=
True
)
super
().
__init__
(
device
=
device
,
layer
=
layer
,
layer_idx
=
layer_idx
,
textmodel_json_config
=
textmodel_json_config
,
dtype
=
dtype
,
special_tokens
=
{
"end"
:
1
,
"pad"
:
0
},
model_class
=
comfy
.
text_encoders
.
t5
.
T5
,
enable_attention_masks
=
True
,
zero_out_masked
=
True
)
class
T5BaseTokenizer
(
sd1_clip
.
SDTokenizer
):
class
T5BaseTokenizer
(
sd1_clip
.
SDTokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
tokenizer_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"t5_tokenizer"
)
tokenizer_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"t5_tokenizer"
)
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
False
,
embedding_size
=
768
,
embedding_key
=
't5base'
,
tokenizer_class
=
T5TokenizerFast
,
has_start_token
=
False
,
pad_to_max_length
=
False
,
max_length
=
99999999
,
min_length
=
128
)
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
False
,
embedding_size
=
768
,
embedding_key
=
't5base'
,
tokenizer_class
=
T5TokenizerFast
,
has_start_token
=
False
,
pad_to_max_length
=
False
,
max_length
=
99999999
,
min_length
=
128
)
class
SAT5Tokenizer
(
sd1_clip
.
SD1Tokenizer
):
class
SAT5Tokenizer
(
sd1_clip
.
SD1Tokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
super
().
__init__
(
embedding_directory
=
embedding_directory
,
clip_name
=
"t5base"
,
tokenizer
=
T5BaseTokenizer
)
super
().
__init__
(
embedding_directory
=
embedding_directory
,
tokenizer_data
=
tokenizer_data
,
clip_name
=
"t5base"
,
tokenizer
=
T5BaseTokenizer
)
class
SAT5Model
(
sd1_clip
.
SD1ClipModel
):
class
SAT5Model
(
sd1_clip
.
SD1ClipModel
):
def
__init__
(
self
,
device
=
"cpu"
,
dtype
=
None
,
**
kwargs
):
def
__init__
(
self
,
device
=
"cpu"
,
dtype
=
None
,
**
kwargs
):
...
...
comfy/text_encoders/sd3_clip.py
View file @
10c919f4
...
@@ -13,22 +13,13 @@ class T5XXLModel(sd1_clip.SDClipModel):
...
@@ -13,22 +13,13 @@ class T5XXLModel(sd1_clip.SDClipModel):
super
().
__init__
(
device
=
device
,
layer
=
layer
,
layer_idx
=
layer_idx
,
textmodel_json_config
=
textmodel_json_config
,
dtype
=
dtype
,
special_tokens
=
{
"end"
:
1
,
"pad"
:
0
},
model_class
=
comfy
.
text_encoders
.
t5
.
T5
)
super
().
__init__
(
device
=
device
,
layer
=
layer
,
layer_idx
=
layer_idx
,
textmodel_json_config
=
textmodel_json_config
,
dtype
=
dtype
,
special_tokens
=
{
"end"
:
1
,
"pad"
:
0
},
model_class
=
comfy
.
text_encoders
.
t5
.
T5
)
class
T5XXLTokenizer
(
sd1_clip
.
SDTokenizer
):
class
T5XXLTokenizer
(
sd1_clip
.
SDTokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
tokenizer_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"t5_tokenizer"
)
tokenizer_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"t5_tokenizer"
)
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
False
,
embedding_size
=
4096
,
embedding_key
=
't5xxl'
,
tokenizer_class
=
T5TokenizerFast
,
has_start_token
=
False
,
pad_to_max_length
=
False
,
max_length
=
99999999
,
min_length
=
77
)
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
False
,
embedding_size
=
4096
,
embedding_key
=
't5xxl'
,
tokenizer_class
=
T5TokenizerFast
,
has_start_token
=
False
,
pad_to_max_length
=
False
,
max_length
=
99999999
,
min_length
=
77
)
class
SDT5XXLTokenizer
(
sd1_clip
.
SD1Tokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
super
().
__init__
(
embedding_directory
=
embedding_directory
,
clip_name
=
"t5xxl"
,
tokenizer
=
T5XXLTokenizer
)
class
SDT5XXLModel
(
sd1_clip
.
SD1ClipModel
):
def
__init__
(
self
,
device
=
"cpu"
,
dtype
=
None
,
**
kwargs
):
super
().
__init__
(
device
=
device
,
dtype
=
dtype
,
clip_name
=
"t5xxl"
,
clip_model
=
T5XXLModel
,
**
kwargs
)
class
SD3Tokenizer
:
class
SD3Tokenizer
:
def
__init__
(
self
,
embedding_directory
=
None
):
def
__init__
(
self
,
embedding_directory
=
None
,
tokenizer_data
=
{}
):
self
.
clip_l
=
sd1_clip
.
SDTokenizer
(
embedding_directory
=
embedding_directory
)
self
.
clip_l
=
sd1_clip
.
SDTokenizer
(
embedding_directory
=
embedding_directory
)
self
.
clip_g
=
sdxl_clip
.
SDXLClipGTokenizer
(
embedding_directory
=
embedding_directory
)
self
.
clip_g
=
sdxl_clip
.
SDXLClipGTokenizer
(
embedding_directory
=
embedding_directory
)
self
.
t5xxl
=
T5XXLTokenizer
(
embedding_directory
=
embedding_directory
)
self
.
t5xxl
=
T5XXLTokenizer
(
embedding_directory
=
embedding_directory
)
...
...
comfy/text_encoders/spiece_tokenizer.py
View file @
10c919f4
import
os
import
os
import
torch
class
SPieceTokenizer
:
class
SPieceTokenizer
:
add_eos
=
True
add_eos
=
True
...
@@ -9,6 +10,9 @@ class SPieceTokenizer:
...
@@ -9,6 +10,9 @@ class SPieceTokenizer:
def
__init__
(
self
,
tokenizer_path
):
def
__init__
(
self
,
tokenizer_path
):
import
sentencepiece
import
sentencepiece
if
torch
.
is_tensor
(
tokenizer_path
):
tokenizer_path
=
tokenizer_path
.
numpy
().
tobytes
()
if
isinstance
(
tokenizer_path
,
bytes
):
if
isinstance
(
tokenizer_path
,
bytes
):
self
.
tokenizer
=
sentencepiece
.
SentencePieceProcessor
(
model_proto
=
tokenizer_path
,
add_eos
=
self
.
add_eos
)
self
.
tokenizer
=
sentencepiece
.
SentencePieceProcessor
(
model_proto
=
tokenizer_path
,
add_eos
=
self
.
add_eos
)
else
:
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment