Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
1405b438
Unverified
Commit
1405b438
authored
Jan 03, 2023
by
BlueRum
Committed by
GitHub
Jan 03, 2023
Browse files
[example] fix save_load bug for dreambooth (#2280)
parent
f027ef79
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
53 additions
and
41 deletions
+53
-41
examples/images/dreambooth/colossalai.sh
examples/images/dreambooth/colossalai.sh
+11
-9
examples/images/dreambooth/dreambooth.sh
examples/images/dreambooth/dreambooth.sh
+12
-0
examples/images/dreambooth/inference.py
examples/images/dreambooth/inference.py
+12
-0
examples/images/dreambooth/train.sh
examples/images/dreambooth/train.sh
+0
-19
examples/images/dreambooth/train_dreambooth_colossalai.py
examples/images/dreambooth/train_dreambooth_colossalai.py
+18
-13
No files found.
examples/images/dreambooth/colossalai.sh
View file @
1405b438
export
MODEL_NAME
=
"CompVis/stable-diffusion-v1-4"
export
INSTANCE_DIR
=
"input"
export
OUTPUT_DIR
=
"output"
INSTANCE_PROMPT
=
"a photo of sks dog"
export
MODEL_NAME
=
<Your Pretrained Model Path>
export
INSTANCE_DIR
=
<Your Input Pics Path>
export
CLASS_DIR
=
"path-to-class-images"
export
OUTPUT_DIR
=
"path-to-save-model"
HF_DATASETS_OFFLINE
=
1
TRANSFORMERS_OFFLINE
=
1
DIFFUSERS_OFFLINE
=
1
torchrun
--nproc_per_node
2
--master_port
=
25641 train_dreambooth_colossalai.py
\
--pretrained_model_name_or_path
=
$MODEL_NAME
\
--instance_data_dir
=
$INSTANCE_DIR
\
--output_dir
=
$OUTPUT_DIR
\
--instance_prompt
=
"a photo of a dog"
\
--resolution
=
512
\
--train_batch_size
=
1
\
--gradient_accumulation_steps
=
1
\
--learning_rate
=
5e-6
\
--instance_prompt
=
INSTANCE_PROMPT
\
--lr_scheduler
=
"constant"
\
--lr_warmup_steps
=
0
\
--
max_train_step
s
=
4
00
\
--placement
=
"c
pu"
--
num_class_image
s
=
2
00
\
--placement
=
"c
uda"
\
examples/images/dreambooth/dreambooth.sh
0 → 100644
View file @
1405b438
python train_dreambooth.py
\
--pretrained_model_name_or_path
=
## Your Model Path \
--instance_data_dir
=
## Your Training Input Pics Path \
--output_dir
=
"path-to-save-model"
\
--instance_prompt
=
"a photo of a dog"
\
--resolution
=
512
\
--train_batch_size
=
1
\
--gradient_accumulation_steps
=
1
\
--learning_rate
=
5e-6
\
--lr_scheduler
=
"constant"
\
--lr_warmup_steps
=
0
\
--num_class_images
=
200
\
examples/images/dreambooth/inference.py
0 → 100644
View file @
1405b438
from
diffusers
import
StableDiffusionPipeline
,
DiffusionPipeline
import
torch
model_id
=
<
Your
Model
Path
>
print
(
f
"Loading model... from
{
model_id
}
"
)
pipe
=
DiffusionPipeline
.
from_pretrained
(
model_id
,
torch_dtype
=
torch
.
float16
).
to
(
"cuda"
)
prompt
=
"A photo of an apple."
image
=
pipe
(
prompt
,
num_inference_steps
=
50
,
guidance_scale
=
7.5
).
images
[
0
]
image
.
save
(
"output.png"
)
examples/images/dreambooth/train.sh
deleted
100755 → 0
View file @
f027ef79
export
MODEL_NAME
=
"CompVis/stable-diffusion-v1-4"
export
INSTANCE_DIR
=
"input"
export
OUTPUT_DIR
=
"output"
HF_DATASETS_OFFLINE
=
1
TRANSFORMERS_OFFLINE
=
1
DIFFUSERS_OFFLINE
=
1
accelerate launch train_dreambooth.py
\
--pretrained_model_name_or_path
=
$MODEL_NAME
\
--instance_data_dir
=
$INSTANCE_DIR
\
--output_dir
=
$OUTPUT_DIR
\
--instance_prompt
=
"a photo of sks dog"
\
--resolution
=
512
\
--train_batch_size
=
1
\
--gradient_accumulation_steps
=
1
\
--learning_rate
=
5e-6
\
--lr_scheduler
=
"constant"
\
--lr_warmup_steps
=
0
\
--max_train_steps
=
400
examples/images/dreambooth/train_dreambooth_colossalai.py
View file @
1405b438
...
...
@@ -11,6 +11,7 @@ import torch
import
torch.distributed
as
dist
import
torch.nn.functional
as
F
import
torch.utils.checkpoint
from
copy
import
deepcopy
from
diffusers
import
AutoencoderKL
,
DDPMScheduler
,
DiffusionPipeline
,
UNet2DConditionModel
from
diffusers.optimization
import
get_scheduler
from
huggingface_hub
import
HfFolder
,
Repository
,
whoami
...
...
@@ -359,6 +360,7 @@ def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placememt_policy:
placement_policy
=
placememt_policy
,
pin_memory
=
True
,
search_range_mb
=
32
)
elif
version
.
parse
(
cai_version
)
<=
version
.
parse
(
"0.1.10"
)
and
version
.
parse
(
cai_version
)
>=
version
.
parse
(
"0.1.9"
):
from
colossalai.gemini
import
ChunkManager
,
GeminiManager
chunk_size
=
ChunkManager
.
search_chunk_size
(
model
,
64
*
1024
**
2
,
32
)
...
...
@@ -381,6 +383,7 @@ def main(args):
"gradient_accumulation_steps"
:
args
.
gradient_accumulation_steps
,
"clip_grad_norm"
:
args
.
max_grad_norm
,
}
colossalai
.
launch_from_torch
(
config
=
config
)
pg
=
ProcessGroup
()
...
...
@@ -465,22 +468,22 @@ def main(args):
text_encoder
=
text_encoder_cls
.
from_pretrained
(
args
.
pretrained_model_name_or_path
,
subfolder
=
"text_encoder"
,
revision
=
args
.
revision
,
low_cpu_mem_usage
=
False
)
revision
=
args
.
revision
,)
logger
.
info
(
f
"Loading AutoencoderKL from
{
args
.
pretrained_model_name_or_path
}
"
,
ranks
=
[
0
])
vae
=
AutoencoderKL
.
from_pretrained
(
args
.
pretrained_model_name_or_path
,
subfolder
=
"vae"
,
revision
=
args
.
revision
,
low_cpu_mem_usage
=
False
)
revision
=
args
.
revision
,
)
with
ColoInitContext
(
device
=
'cpu'
):
logger
.
info
(
f
"Loading UNet2DConditionModel from
{
args
.
pretrained_model_name_or_path
}
"
,
ranks
=
[
0
])
with
ColoInitContext
():
unet
=
UNet2DConditionModel
.
from_pretrained
(
args
.
pretrained_model_name_or_path
,
subfolder
=
"unet"
,
revision
=
args
.
revision
,
low_cpu_mem_usage
=
False
)
vae
.
requires_grad_
(
False
)
text_encoder
.
requires_grad_
(
False
)
...
...
@@ -597,7 +600,7 @@ def main(args):
for
epoch
in
range
(
args
.
num_train_epochs
):
unet
.
train
()
for
step
,
batch
in
enumerate
(
train_dataloader
):
torch
.
cuda
.
reset_peak_memory_stats
()
# Move batch to gpu
for
key
,
value
in
batch
.
items
():
batch
[
key
]
=
value
.
to
(
get_current_device
(),
non_blocking
=
True
)
...
...
@@ -653,7 +656,7 @@ def main(args):
optimizer
.
step
()
lr_scheduler
.
step
()
logger
.
info
(
f
"max GPU_mem cost is
{
torch
.
cuda
.
max_memory_allocated
()
/
2
**
20
}
MB"
,
ranks
=
[
0
])
# Checks if the accelerator has performed an optimization step behind the scenes
progress_bar
.
update
(
1
)
global_step
+=
1
...
...
@@ -678,13 +681,15 @@ def main(args):
break
torch
.
cuda
.
synchronize
()
unet
=
convert_to_torch_module
(
unet
)
if
gpc
.
get_local_rank
(
ParallelMode
.
DATA
)
==
0
:
pipeline
=
DiffusionPipeline
.
from_pretrained
(
args
.
pretrained_model_name_or_path
,
unet
=
convert_to_torch_module
(
unet
)
,
unet
=
unet
,
revision
=
args
.
revision
,
)
pipeline
.
save_pretrained
(
args
.
output_dir
)
logger
.
info
(
f
"Saving model checkpoint to
{
args
.
output_dir
}
"
,
ranks
=
[
0
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment