Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
ComfyUI
Commits
2c4e0b49
Commit
2c4e0b49
authored
Jul 02, 2023
by
comfyanonymous
Browse files
Switch to fp16 on some cards when the model is too big.
parent
6f3d9f52
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
30 additions
and
4 deletions
+30
-4
comfy/model_management.py
comfy/model_management.py
+22
-3
comfy/sd.py
comfy/sd.py
+8
-1
No files found.
comfy/model_management.py
View file @
2c4e0b49
...
...
@@ -245,6 +245,8 @@ def unload_model():
n
.
cpu
()
current_gpu_controlnets
=
[]
def
minimum_inference_memory
():
return
(
768
*
1024
*
1024
)
def
load_model_gpu
(
model
):
global
current_loaded_model
...
...
@@ -272,7 +274,7 @@ def load_model_gpu(model):
model_size
=
model
.
model_size
()
current_free_mem
=
get_free_memory
(
torch_dev
)
lowvram_model_memory
=
int
(
max
(
256
*
(
1024
*
1024
),
(
current_free_mem
-
1024
*
(
1024
*
1024
))
/
1.3
))
if
model_size
>
(
current_free_mem
-
(
512
*
1024
*
1024
)):
#only switch to lowvram if really necessary
if
model_size
>
(
current_free_mem
-
minimum_inference_memory
(
)):
#only switch to lowvram if really necessary
vram_set_state
=
VRAMState
.
LOW_VRAM
current_loaded_model
=
model
...
...
@@ -458,7 +460,7 @@ def is_device_cpu(device):
return
True
return
False
def
should_use_fp16
(
device
=
None
):
def
should_use_fp16
(
device
=
None
,
model_params
=
0
):
global
xpu_available
global
directml_enabled
...
...
@@ -482,10 +484,27 @@ def should_use_fp16(device=None):
return
True
props
=
torch
.
cuda
.
get_device_properties
(
"cuda"
)
if
props
.
major
<
6
:
return
False
fp16_works
=
False
#FP16 is confirmed working on a 1080 (GP104) but it's a bit slower than FP32 so it should only be enabled
#when the model doesn't actually fit on the card
#TODO: actually test if GP106 and others have the same type of behavior
nvidia_10_series
=
[
"1080"
,
"1070"
,
"titan x"
,
"p3000"
,
"p3200"
,
"p4000"
,
"p4200"
,
"p5000"
,
"p5200"
,
"p6000"
,
"1060"
,
"1050"
]
for
x
in
nvidia_10_series
:
if
x
in
props
.
name
.
lower
():
fp16_works
=
True
if
fp16_works
:
free_model_memory
=
(
get_free_memory
()
*
0.9
-
minimum_inference_memory
())
if
model_params
*
4
>
free_model_memory
:
return
True
if
props
.
major
<
7
:
return
False
#FP
32
is
faster
on th
o
se cards
?
#FP
16
is
just broken
on th
e
se cards
nvidia_16_series
=
[
"1660"
,
"1650"
,
"1630"
,
"T500"
,
"T550"
,
"T600"
]
for
x
in
nvidia_16_series
:
if
x
in
props
.
name
:
...
...
comfy/sd.py
View file @
2c4e0b49
...
...
@@ -1122,6 +1122,12 @@ def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_cl
return
(
ModelPatcher
(
model
,
load_device
=
model_management
.
get_torch_device
(),
offload_device
=
offload_device
),
clip
,
vae
)
def
calculate_parameters
(
sd
,
prefix
):
params
=
0
for
k
in
sd
.
keys
():
if
k
.
startswith
(
prefix
):
params
+=
sd
[
k
].
nelement
()
return
params
def
load_checkpoint_guess_config
(
ckpt_path
,
output_vae
=
True
,
output_clip
=
True
,
output_clipvision
=
False
,
embedding_directory
=
None
):
sd
=
utils
.
load_torch_file
(
ckpt_path
)
...
...
@@ -1132,7 +1138,8 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
model
=
None
clip_target
=
None
fp16
=
model_management
.
should_use_fp16
()
parameters
=
calculate_parameters
(
sd
,
"model.diffusion_model."
)
fp16
=
model_management
.
should_use_fp16
(
model_params
=
parameters
)
class
WeightsLoader
(
torch
.
nn
.
Module
):
pass
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment