Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
diffusers
Commits
31336dae
Unverified
Commit
31336dae
authored
Jan 24, 2023
by
Pedro Cuenca
Committed by
GitHub
Jan 24, 2023
Browse files
Fix resume epoch for all training scripts except textual_inversion (#2079)
parent
0e98e839
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
119 additions
and
61 deletions
+119
-61
examples/dreambooth/train_dreambooth.py
examples/dreambooth/train_dreambooth.py
+15
-8
examples/dreambooth/train_dreambooth_lora.py
examples/dreambooth/train_dreambooth_lora.py
+15
-8
examples/research_projects/dreambooth_inpaint/train_dreambooth_inpaint.py
...h_projects/dreambooth_inpaint/train_dreambooth_inpaint.py
+15
-8
examples/research_projects/multi_subject_dreambooth/train_multi_subject_dreambooth.py
...ulti_subject_dreambooth/train_multi_subject_dreambooth.py
+15
-8
examples/text_to_image/train_text_to_image.py
examples/text_to_image/train_text_to_image.py
+14
-6
examples/text_to_image/train_text_to_image_lora.py
examples/text_to_image/train_text_to_image_lora.py
+15
-8
examples/unconditional_image_generation/train_unconditional.py
...les/unconditional_image_generation/train_unconditional.py
+15
-8
examples/unconditional_image_generation/train_unconditional_ort.py
...unconditional_image_generation/train_unconditional_ort.py
+15
-7
No files found.
examples/dreambooth/train_dreambooth.py
View file @
31336dae
...
...
@@ -757,14 +757,21 @@ def main(args):
dirs
=
os
.
listdir
(
args
.
output_dir
)
dirs
=
[
d
for
d
in
dirs
if
d
.
startswith
(
"checkpoint"
)]
dirs
=
sorted
(
dirs
,
key
=
lambda
x
:
int
(
x
.
split
(
"-"
)[
1
]))
path
=
dirs
[
-
1
]
path
=
dirs
[
-
1
]
if
len
(
dirs
)
>
0
else
None
if
path
is
None
:
accelerator
.
print
(
f
"Checkpoint '
{
args
.
resume_from_checkpoint
}
' does not exist. Starting a new training run."
)
args
.
resume_from_checkpoint
=
None
else
:
accelerator
.
print
(
f
"Resuming from checkpoint
{
path
}
"
)
accelerator
.
load_state
(
os
.
path
.
join
(
args
.
output_dir
,
path
))
global_step
=
int
(
path
.
split
(
"-"
)[
1
])
resume_global_step
=
global_step
*
args
.
gradient_accumulation_steps
first_epoch
=
resume_
global_step
//
num_update_steps_per_epoch
resume_step
=
resume_global_step
%
num_update_steps_per_epoch
first_epoch
=
global_step
//
num_update_steps_per_epoch
resume_step
=
resume_global_step
%
(
num_update_steps_per_epoch
*
args
.
gradient_accumulation_steps
)
# Only show the progress bar once on each machine.
progress_bar
=
tqdm
(
range
(
global_step
,
args
.
max_train_steps
),
disable
=
not
accelerator
.
is_local_main_process
)
...
...
examples/dreambooth/train_dreambooth_lora.py
View file @
31336dae
...
...
@@ -814,14 +814,21 @@ def main(args):
dirs
=
os
.
listdir
(
args
.
output_dir
)
dirs
=
[
d
for
d
in
dirs
if
d
.
startswith
(
"checkpoint"
)]
dirs
=
sorted
(
dirs
,
key
=
lambda
x
:
int
(
x
.
split
(
"-"
)[
1
]))
path
=
dirs
[
-
1
]
path
=
dirs
[
-
1
]
if
len
(
dirs
)
>
0
else
None
if
path
is
None
:
accelerator
.
print
(
f
"Checkpoint '
{
args
.
resume_from_checkpoint
}
' does not exist. Starting a new training run."
)
args
.
resume_from_checkpoint
=
None
else
:
accelerator
.
print
(
f
"Resuming from checkpoint
{
path
}
"
)
accelerator
.
load_state
(
os
.
path
.
join
(
args
.
output_dir
,
path
))
global_step
=
int
(
path
.
split
(
"-"
)[
1
])
resume_global_step
=
global_step
*
args
.
gradient_accumulation_steps
first_epoch
=
resume_
global_step
//
num_update_steps_per_epoch
resume_step
=
resume_global_step
%
num_update_steps_per_epoch
first_epoch
=
global_step
//
num_update_steps_per_epoch
resume_step
=
resume_global_step
%
(
num_update_steps_per_epoch
*
args
.
gradient_accumulation_steps
)
# Only show the progress bar once on each machine.
progress_bar
=
tqdm
(
range
(
global_step
,
args
.
max_train_steps
),
disable
=
not
accelerator
.
is_local_main_process
)
...
...
examples/research_projects/dreambooth_inpaint/train_dreambooth_inpaint.py
View file @
31336dae
...
...
@@ -660,14 +660,21 @@ def main():
dirs
=
os
.
listdir
(
args
.
output_dir
)
dirs
=
[
d
for
d
in
dirs
if
d
.
startswith
(
"checkpoint"
)]
dirs
=
sorted
(
dirs
,
key
=
lambda
x
:
int
(
x
.
split
(
"-"
)[
1
]))
path
=
dirs
[
-
1
]
path
=
dirs
[
-
1
]
if
len
(
dirs
)
>
0
else
None
if
path
is
None
:
accelerator
.
print
(
f
"Checkpoint '
{
args
.
resume_from_checkpoint
}
' does not exist. Starting a new training run."
)
args
.
resume_from_checkpoint
=
None
else
:
accelerator
.
print
(
f
"Resuming from checkpoint
{
path
}
"
)
accelerator
.
load_state
(
os
.
path
.
join
(
args
.
output_dir
,
path
))
global_step
=
int
(
path
.
split
(
"-"
)[
1
])
resume_global_step
=
global_step
*
args
.
gradient_accumulation_steps
first_epoch
=
resume_
global_step
//
num_update_steps_per_epoch
resume_step
=
resume_global_step
%
num_update_steps_per_epoch
first_epoch
=
global_step
//
num_update_steps_per_epoch
resume_step
=
resume_global_step
%
(
num_update_steps_per_epoch
*
args
.
gradient_accumulation_steps
)
# Only show the progress bar once on each machine.
progress_bar
=
tqdm
(
range
(
global_step
,
args
.
max_train_steps
),
disable
=
not
accelerator
.
is_local_main_process
)
...
...
examples/research_projects/multi_subject_dreambooth/train_multi_subject_dreambooth.py
View file @
31336dae
...
...
@@ -748,14 +748,21 @@ def main(args):
dirs
=
os
.
listdir
(
args
.
output_dir
)
dirs
=
[
d
for
d
in
dirs
if
d
.
startswith
(
"checkpoint"
)]
dirs
=
sorted
(
dirs
,
key
=
lambda
x
:
int
(
x
.
split
(
"-"
)[
1
]))
path
=
dirs
[
-
1
]
path
=
dirs
[
-
1
]
if
len
(
dirs
)
>
0
else
None
if
path
is
None
:
accelerator
.
print
(
f
"Checkpoint '
{
args
.
resume_from_checkpoint
}
' does not exist. Starting a new training run."
)
args
.
resume_from_checkpoint
=
None
else
:
accelerator
.
print
(
f
"Resuming from checkpoint
{
path
}
"
)
accelerator
.
load_state
(
os
.
path
.
join
(
args
.
output_dir
,
path
))
global_step
=
int
(
path
.
split
(
"-"
)[
1
])
resume_global_step
=
global_step
*
args
.
gradient_accumulation_steps
first_epoch
=
resume_
global_step
//
num_update_steps_per_epoch
resume_step
=
resume_global_step
%
num_update_steps_per_epoch
first_epoch
=
global_step
//
num_update_steps_per_epoch
resume_step
=
resume_global_step
%
(
num_update_steps_per_epoch
*
args
.
gradient_accumulation_steps
)
# Only show the progress bar once on each machine.
progress_bar
=
tqdm
(
range
(
global_step
,
args
.
max_train_steps
),
disable
=
not
accelerator
.
is_local_main_process
)
...
...
examples/text_to_image/train_text_to_image.py
View file @
31336dae
...
...
@@ -599,13 +599,21 @@ def main():
dirs
=
os
.
listdir
(
args
.
output_dir
)
dirs
=
[
d
for
d
in
dirs
if
d
.
startswith
(
"checkpoint"
)]
dirs
=
sorted
(
dirs
,
key
=
lambda
x
:
int
(
x
.
split
(
"-"
)[
1
]))
path
=
dirs
[
-
1
]
path
=
dirs
[
-
1
]
if
len
(
dirs
)
>
0
else
None
if
path
is
None
:
accelerator
.
print
(
f
"Checkpoint '
{
args
.
resume_from_checkpoint
}
' does not exist. Starting a new training run."
)
args
.
resume_from_checkpoint
=
None
else
:
accelerator
.
print
(
f
"Resuming from checkpoint
{
path
}
"
)
accelerator
.
load_state
(
os
.
path
.
join
(
args
.
output_dir
,
path
))
global_step
=
int
(
path
.
split
(
"-"
)[
1
])
resume_global_step
=
global_step
*
args
.
gradient_accumulation_steps
first_epoch
=
global_step
//
num_update_steps_per_epoch
resume_step
=
global_step
%
num_update_steps_per_epoch
resume_step
=
resume_
global_step
%
(
num_update_steps_per_epoch
*
args
.
gradient_accumulation_steps
)
# Only show the progress bar once on each machine.
progress_bar
=
tqdm
(
range
(
global_step
,
args
.
max_train_steps
),
disable
=
not
accelerator
.
is_local_main_process
)
...
...
examples/text_to_image/train_text_to_image_lora.py
View file @
31336dae
...
...
@@ -651,14 +651,21 @@ def main():
dirs
=
os
.
listdir
(
args
.
output_dir
)
dirs
=
[
d
for
d
in
dirs
if
d
.
startswith
(
"checkpoint"
)]
dirs
=
sorted
(
dirs
,
key
=
lambda
x
:
int
(
x
.
split
(
"-"
)[
1
]))
path
=
dirs
[
-
1
]
path
=
dirs
[
-
1
]
if
len
(
dirs
)
>
0
else
None
if
path
is
None
:
accelerator
.
print
(
f
"Checkpoint '
{
args
.
resume_from_checkpoint
}
' does not exist. Starting a new training run."
)
args
.
resume_from_checkpoint
=
None
else
:
accelerator
.
print
(
f
"Resuming from checkpoint
{
path
}
"
)
accelerator
.
load_state
(
os
.
path
.
join
(
args
.
output_dir
,
path
))
global_step
=
int
(
path
.
split
(
"-"
)[
1
])
resume_global_step
=
global_step
*
args
.
gradient_accumulation_steps
first_epoch
=
resume_
global_step
//
num_update_steps_per_epoch
resume_step
=
resume_global_step
%
num_update_steps_per_epoch
first_epoch
=
global_step
//
num_update_steps_per_epoch
resume_step
=
resume_global_step
%
(
num_update_steps_per_epoch
*
args
.
gradient_accumulation_steps
)
# Only show the progress bar once on each machine.
progress_bar
=
tqdm
(
range
(
global_step
,
args
.
max_train_steps
),
disable
=
not
accelerator
.
is_local_main_process
)
...
...
examples/unconditional_image_generation/train_unconditional.py
View file @
31336dae
...
...
@@ -439,14 +439,21 @@ def main(args):
dirs
=
os
.
listdir
(
args
.
output_dir
)
dirs
=
[
d
for
d
in
dirs
if
d
.
startswith
(
"checkpoint"
)]
dirs
=
sorted
(
dirs
,
key
=
lambda
x
:
int
(
x
.
split
(
"-"
)[
1
]))
path
=
dirs
[
-
1
]
path
=
dirs
[
-
1
]
if
len
(
dirs
)
>
0
else
None
if
path
is
None
:
accelerator
.
print
(
f
"Checkpoint '
{
args
.
resume_from_checkpoint
}
' does not exist. Starting a new training run."
)
args
.
resume_from_checkpoint
=
None
else
:
accelerator
.
print
(
f
"Resuming from checkpoint
{
path
}
"
)
accelerator
.
load_state
(
os
.
path
.
join
(
args
.
output_dir
,
path
))
global_step
=
int
(
path
.
split
(
"-"
)[
1
])
resume_global_step
=
global_step
*
args
.
gradient_accumulation_steps
first_epoch
=
resume_
global_step
//
num_update_steps_per_epoch
resume_step
=
resume_global_step
%
num_update_steps_per_epoch
first_epoch
=
global_step
//
num_update_steps_per_epoch
resume_step
=
resume_global_step
%
(
num_update_steps_per_epoch
*
args
.
gradient_accumulation_steps
)
# Train!
for
epoch
in
range
(
first_epoch
,
args
.
num_epochs
):
...
...
examples/unconditional_image_generation/train_unconditional_ort.py
View file @
31336dae
...
...
@@ -396,13 +396,21 @@ def main(args):
dirs
=
os
.
listdir
(
args
.
output_dir
)
dirs
=
[
d
for
d
in
dirs
if
d
.
startswith
(
"checkpoint"
)]
dirs
=
sorted
(
dirs
,
key
=
lambda
x
:
int
(
x
.
split
(
"-"
)[
1
]))
path
=
dirs
[
-
1
]
path
=
dirs
[
-
1
]
if
len
(
dirs
)
>
0
else
None
if
path
is
None
:
accelerator
.
print
(
f
"Checkpoint '
{
args
.
resume_from_checkpoint
}
' does not exist. Starting a new training run."
)
args
.
resume_from_checkpoint
=
None
else
:
accelerator
.
print
(
f
"Resuming from checkpoint
{
path
}
"
)
accelerator
.
load_state
(
os
.
path
.
join
(
args
.
output_dir
,
path
))
global_step
=
int
(
path
.
split
(
"-"
)[
1
])
resume_global_step
=
global_step
*
args
.
gradient_accumulation_steps
first_epoch
=
resume_
global_step
//
num_update_steps_per_epoch
resume_step
=
resume_global_step
%
num_update_steps_per_epoch
first_epoch
=
global_step
//
num_update_steps_per_epoch
resume_step
=
resume_global_step
%
(
num_update_steps_per_epoch
*
args
.
gradient_accumulation_steps
)
for
epoch
in
range
(
first_epoch
,
args
.
num_epochs
):
model
.
train
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment