Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
ComfyUI
Commits
76d53c46
Commit
76d53c46
authored
Aug 18, 2023
by
comfyanonymous
Browse files
Add support for clip g vision model to CLIPVisionLoader.
parent
fc99fa56
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
28 additions
and
3 deletions
+28
-3
comfy/clip_vision.py
comfy/clip_vision.py
+10
-3
comfy/clip_vision_config_g.json
comfy/clip_vision_config_g.json
+18
-0
No files found.
comfy/clip_vision.py
View file @
76d53c46
...
@@ -50,18 +50,22 @@ def convert_to_transformers(sd, prefix):
...
@@ -50,18 +50,22 @@ def convert_to_transformers(sd, prefix):
if
"{}proj"
.
format
(
prefix
)
in
sd_k
:
if
"{}proj"
.
format
(
prefix
)
in
sd_k
:
sd
[
'visual_projection.weight'
]
=
sd
.
pop
(
"{}proj"
.
format
(
prefix
)).
transpose
(
0
,
1
)
sd
[
'visual_projection.weight'
]
=
sd
.
pop
(
"{}proj"
.
format
(
prefix
)).
transpose
(
0
,
1
)
sd
=
transformers_convert
(
sd
,
prefix
,
"vision_model."
,
32
)
sd
=
transformers_convert
(
sd
,
prefix
,
"vision_model."
,
48
)
return
sd
return
sd
def
load_clipvision_from_sd
(
sd
,
prefix
=
""
,
convert_keys
=
False
):
def
load_clipvision_from_sd
(
sd
,
prefix
=
""
,
convert_keys
=
False
):
if
convert_keys
:
if
convert_keys
:
sd
=
convert_to_transformers
(
sd
,
prefix
)
sd
=
convert_to_transformers
(
sd
,
prefix
)
if
"vision_model.encoder.layers.30.layer_norm1.weight"
in
sd
:
if
"vision_model.encoder.layers.47.layer_norm1.weight"
in
sd
:
json_config
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"clip_vision_config_g.json"
)
elif
"vision_model.encoder.layers.30.layer_norm1.weight"
in
sd
:
json_config
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"clip_vision_config_h.json"
)
json_config
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"clip_vision_config_h.json"
)
else
:
else
:
json_config
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"clip_vision_config_vitl.json"
)
json_config
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"clip_vision_config_vitl.json"
)
clip
=
ClipVisionModel
(
json_config
)
clip
=
ClipVisionModel
(
json_config
)
m
,
u
=
clip
.
load_sd
(
sd
)
m
,
u
=
clip
.
load_sd
(
sd
)
if
len
(
m
)
>
0
:
print
(
"missing clip vision:"
,
m
)
u
=
set
(
u
)
u
=
set
(
u
)
keys
=
list
(
sd
.
keys
())
keys
=
list
(
sd
.
keys
())
for
k
in
keys
:
for
k
in
keys
:
...
@@ -72,4 +76,7 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
...
@@ -72,4 +76,7 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
def
load
(
ckpt_path
):
def
load
(
ckpt_path
):
sd
=
load_torch_file
(
ckpt_path
)
sd
=
load_torch_file
(
ckpt_path
)
return
load_clipvision_from_sd
(
sd
)
if
"visual.transformer.resblocks.0.attn.in_proj_weight"
in
sd
:
return
load_clipvision_from_sd
(
sd
,
prefix
=
"visual."
,
convert_keys
=
True
)
else
:
return
load_clipvision_from_sd
(
sd
)
comfy/clip_vision_config_g.json
0 → 100644
View file @
76d53c46
{
"attention_dropout"
:
0.0
,
"dropout"
:
0.0
,
"hidden_act"
:
"gelu"
,
"hidden_size"
:
1664
,
"image_size"
:
224
,
"initializer_factor"
:
1.0
,
"initializer_range"
:
0.02
,
"intermediate_size"
:
8192
,
"layer_norm_eps"
:
1e-05
,
"model_type"
:
"clip_vision_model"
,
"num_attention_heads"
:
16
,
"num_channels"
:
3
,
"num_hidden_layers"
:
48
,
"patch_size"
:
14
,
"projection_dim"
:
1280
,
"torch_dtype"
:
"float32"
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment