Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
renzhc
diffusers_dcu
Commits
7d8bf1a9
Commit
7d8bf1a9
authored
Jun 14, 2022
by
Patrick von Platen
Browse files
make pndm easier
parent
ca72c1f8
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
186 additions
and
66 deletions
+186
-66
_
_
+156
-0
src/diffusers/pipelines/pipeline_pndm.py
src/diffusers/pipelines/pipeline_pndm.py
+30
-66
No files found.
_
0 → 100644
View file @
7d8bf1a9
#
Copyright
2022
The
HuggingFace
Team
.
All
rights
reserved
.
#
#
Licensed
under
the
Apache
License
,
Version
2.0
(
the
"License"
);
#
you
may
not
use
this
file
except
in
compliance
with
the
License
.
#
You
may
obtain
a
copy
of
the
License
at
#
#
http
://
www
.
apache
.
org
/
licenses
/
LICENSE
-
2.0
#
#
Unless
required
by
applicable
law
or
agreed
to
in
writing
,
software
#
distributed
under
the
License
is
distributed
on
an
"AS IS"
BASIS
,
#
WITHOUT
WARRANTIES
OR
CONDITIONS
OF
ANY
KIND
,
either
express
or
implied
.
#
See
the
License
for
the
specific
language
governing
permissions
and
#
limitations
under
the
License
.
import
torch
import
tqdm
from
..
pipeline_utils
import
DiffusionPipeline
class
PNDM
(
DiffusionPipeline
):
def
__init__
(
self
,
unet
,
noise_scheduler
):
super
().
__init__
()
noise_scheduler
=
noise_scheduler
.
set_format
(
"pt"
)
self
.
register_modules
(
unet
=
unet
,
noise_scheduler
=
noise_scheduler
)
def
__call__
(
self
,
batch_size
=
1
,
generator
=
None
,
torch_device
=
None
,
num_inference_steps
=
50
):
#
eta
corresponds
to
η
in
paper
and
should
be
between
[
0
,
1
]
if
torch_device
is
None
:
torch_device
=
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
num_trained_timesteps
=
self
.
noise_scheduler
.
timesteps
inference_step_times
=
range
(
0
,
num_trained_timesteps
,
num_trained_timesteps
//
num_inference_steps
)
self
.
unet
.
to
(
torch_device
)
#
Sample
gaussian
noise
to
begin
loop
image
=
torch
.
randn
(
(
batch_size
,
self
.
unet
.
in_channels
,
self
.
unet
.
resolution
,
self
.
unet
.
resolution
),
generator
=
generator
,
)
image
=
image
.
to
(
torch_device
)
seq
=
list
(
inference_step_times
)
seq_next
=
[-
1
]
+
list
(
seq
[:-
1
])
model
=
self
.
unet
warmup_steps
=
[
len
(
seq
)
-
(
i
//
4
+
1
)
for
i
in
range
(
3
*
4
)]
ets
=
[]
prev_image
=
image
for
i
,
step_idx
in
enumerate
(
warmup_steps
):
i
=
seq
[
step_idx
]
j
=
seq_next
[
step_idx
]
t
=
(
torch
.
ones
(
image
.
shape
[
0
])
*
i
)
t_next
=
(
torch
.
ones
(
image
.
shape
[
0
])
*
j
)
residual
=
model
(
image
.
to
(
"cuda"
),
t
.
to
(
"cuda"
))
residual
=
residual
.
to
(
"cpu"
)
image
=
image
.
to
(
"cpu"
)
image
=
self
.
noise_scheduler
.
transfer
(
prev_image
.
to
(
"cpu"
),
t_list
[
0
],
t_list
[
1
],
residual
)
if
i
%
4
==
0
:
ets
.
append
(
residual
)
prev_image
=
image
for
ets
=
[]
step_idx
=
len
(
seq
)
-
1
while
step_idx
>=
0
:
i
=
seq
[
step_idx
]
j
=
seq_next
[
step_idx
]
t
=
(
torch
.
ones
(
image
.
shape
[
0
])
*
i
)
t_next
=
(
torch
.
ones
(
image
.
shape
[
0
])
*
j
)
residual
=
model
(
image
.
to
(
"cuda"
),
t
.
to
(
"cuda"
))
residual
=
residual
.
to
(
"cpu"
)
t_list
=
[
t
,
(
t
+
t_next
)/
2
,
t_next
]
ets
.
append
(
residual
)
if
len
(
ets
)
<=
3
:
image
=
image
.
to
(
"cpu"
)
x_2
=
self
.
noise_scheduler
.
transfer
(
image
.
to
(
"cpu"
),
t_list
[
0
],
t_list
[
1
],
residual
)
e_2
=
model
(
x_2
.
to
(
"cuda"
),
t_list
[
1
].
to
(
"cuda"
)).
to
(
"cpu"
)
x_3
=
self
.
noise_scheduler
.
transfer
(
image
,
t_list
[
0
],
t_list
[
1
],
e_2
)
e_3
=
model
(
x_3
.
to
(
"cuda"
),
t_list
[
1
].
to
(
"cuda"
)).
to
(
"cpu"
)
x_4
=
self
.
noise_scheduler
.
transfer
(
image
,
t_list
[
0
],
t_list
[
2
],
e_3
)
e_4
=
model
(
x_4
.
to
(
"cuda"
),
t_list
[
2
].
to
(
"cuda"
)).
to
(
"cpu"
)
residual
=
(
1
/
6
)
*
(
residual
+
2
*
e_2
+
2
*
e_3
+
e_4
)
else
:
residual
=
(
1
/
24
)
*
(
55
*
ets
[-
1
]
-
59
*
ets
[-
2
]
+
37
*
ets
[-
3
]
-
9
*
ets
[-
4
])
img_next
=
self
.
noise_scheduler
.
transfer
(
image
.
to
(
"cpu"
),
t
,
t_next
,
residual
)
image
=
img_next
step_idx
=
step_idx
-
1
#
if
len
(
prev_noises
)
in
[
1
,
2
]:
#
t
=
(
t
+
t_next
)
/
2
#
elif
len
(
prev_noises
)
==
3
:
#
t
=
t_next
/
2
#
if
len
(
prev_noises
)
==
0
:
#
ets
.
append
(
residual
)
#
#
if
len
(
ets
)
>
3
:
#
residual
=
(
1
/
24
)
*
(
55
*
ets
[-
1
]
-
59
*
ets
[-
2
]
+
37
*
ets
[-
3
]
-
9
*
ets
[-
4
])
#
step_idx
=
step_idx
-
1
#
elif
len
(
ets
)
<=
3
and
len
(
prev_noises
)
==
3
:
#
residual
=
(
1
/
6
)
*
(
prev_noises
[-
3
]
+
2
*
prev_noises
[-
2
]
+
2
*
prev_noises
[-
1
]
+
residual
)
#
prev_noises
=
[]
#
step_idx
=
step_idx
-
1
#
elif
len
(
ets
)
<=
3
and
len
(
prev_noises
)
<
3
:
#
prev_noises
.
append
(
residual
)
#
if
len
(
prev_noises
)
<
2
:
#
t_next
=
(
t
+
t_next
)
/
2
#
#
image
=
self
.
noise_scheduler
.
transfer
(
image
.
to
(
"cpu"
),
t
,
t_next
,
residual
)
return
image
#
See
formulas
(
12
)
and
(
16
)
of
DDIM
paper
https
://
arxiv
.
org
/
pdf
/
2010.02502
.
pdf
#
Ideally
,
read
DDIM
paper
in
-
detail
understanding
#
Notation
(<
variable
name
>
->
<
name
in
paper
>
#
-
pred_noise_t
->
e_theta
(
x_t
,
t
)
#
-
pred_original_image
->
f_theta
(
x_t
,
t
)
or
x_0
#
-
std_dev_t
->
sigma_t
#
-
eta
->
η
#
-
pred_image_direction
->
"direction pointingc to x_t"
#
-
pred_prev_image
->
"x_t-1"
#
for
t
in
tqdm
.
tqdm
(
reversed
(
range
(
num_inference_steps
)),
total
=
num_inference_steps
):
#
1.
predict
noise
residual
#
with
torch
.
no_grad
():
#
residual
=
self
.
unet
(
image
,
inference_step_times
[
t
])
#
#
2.
predict
previous
mean
of
image
x_t
-
1
#
pred_prev_image
=
self
.
noise_scheduler
.
step
(
residual
,
image
,
t
,
num_inference_steps
,
eta
)
#
#
3.
optionally
sample
variance
#
variance
=
0
#
if
eta
>
0
:
#
noise
=
torch
.
randn
(
image
.
shape
,
generator
=
generator
).
to
(
image
.
device
)
#
variance
=
self
.
noise_scheduler
.
get_variance
(
t
,
num_inference_steps
).
sqrt
()
*
eta
*
noise
#
#
4.
set
current
image
to
prev_image
:
x_t
->
x_t
-
1
#
image
=
pred_prev_image
+
variance
src/diffusers/pipelines/pipeline_pndm.py
View file @
7d8bf1a9
...
@@ -48,9 +48,36 @@ class PNDM(DiffusionPipeline):
...
@@ -48,9 +48,36 @@ class PNDM(DiffusionPipeline):
seq_next
=
[
-
1
]
+
list
(
seq
[:
-
1
])
seq_next
=
[
-
1
]
+
list
(
seq
[:
-
1
])
model
=
self
.
unet
model
=
self
.
unet
warmup_time_steps
=
list
(
reversed
([(
t
+
5
)
//
10
*
10
for
t
in
range
(
seq
[
-
4
],
seq
[
-
1
],
5
)]))
cur_residual
=
0
prev_image
=
image
ets
=
[]
ets
=
[]
prev_noises
=
[]
for
i
in
range
(
len
(
warmup_time_steps
)):
step_idx
=
len
(
seq
)
-
1
t
=
warmup_time_steps
[
i
]
*
torch
.
ones
(
image
.
shape
[
0
])
t_next
=
(
warmup_time_steps
[
i
+
1
]
if
i
<
len
(
warmup_time_steps
)
-
1
else
warmup_time_steps
[
-
1
])
*
torch
.
ones
(
image
.
shape
[
0
])
residual
=
model
(
image
.
to
(
"cuda"
),
t
.
to
(
"cuda"
))
residual
=
residual
.
to
(
"cpu"
)
if
i
%
4
==
0
:
cur_residual
+=
1
/
6
*
residual
ets
.
append
(
residual
)
prev_image
=
image
elif
(
i
-
1
)
%
4
==
0
:
cur_residual
+=
1
/
3
*
residual
elif
(
i
-
2
)
%
4
==
0
:
cur_residual
+=
1
/
3
*
residual
elif
(
i
-
3
)
%
4
==
0
:
cur_residual
+=
1
/
6
*
residual
residual
=
cur_residual
cur_residual
=
0
image
=
image
.
to
(
"cpu"
)
t_2
=
warmup_time_steps
[
4
*
(
i
//
4
)]
*
torch
.
ones
(
image
.
shape
[
0
])
image
=
self
.
noise_scheduler
.
transfer
(
prev_image
.
to
(
"cpu"
),
t_2
,
t_next
,
residual
)
step_idx
=
len
(
seq
)
-
4
while
step_idx
>=
0
:
while
step_idx
>=
0
:
i
=
seq
[
step_idx
]
i
=
seq
[
step_idx
]
j
=
seq_next
[
step_idx
]
j
=
seq_next
[
step_idx
]
...
@@ -60,21 +87,7 @@ class PNDM(DiffusionPipeline):
...
@@ -60,21 +87,7 @@ class PNDM(DiffusionPipeline):
residual
=
model
(
image
.
to
(
"cuda"
),
t
.
to
(
"cuda"
))
residual
=
model
(
image
.
to
(
"cuda"
),
t
.
to
(
"cuda"
))
residual
=
residual
.
to
(
"cpu"
)
residual
=
residual
.
to
(
"cpu"
)
t_list
=
[
t
,
(
t
+
t_next
)
/
2
,
t_next
]
ets
.
append
(
residual
)
ets
.
append
(
residual
)
if
len
(
ets
)
<=
3
:
image
=
image
.
to
(
"cpu"
)
x_2
=
self
.
noise_scheduler
.
transfer
(
image
.
to
(
"cpu"
),
t_list
[
0
],
t_list
[
1
],
residual
)
e_2
=
model
(
x_2
.
to
(
"cuda"
),
t_list
[
1
].
to
(
"cuda"
)).
to
(
"cpu"
)
x_3
=
self
.
noise_scheduler
.
transfer
(
image
,
t_list
[
0
],
t_list
[
1
],
e_2
)
e_3
=
model
(
x_3
.
to
(
"cuda"
),
t_list
[
1
].
to
(
"cuda"
)).
to
(
"cpu"
)
x_4
=
self
.
noise_scheduler
.
transfer
(
image
,
t_list
[
0
],
t_list
[
2
],
e_3
)
e_4
=
model
(
x_4
.
to
(
"cuda"
),
t_list
[
2
].
to
(
"cuda"
)).
to
(
"cpu"
)
residual
=
(
1
/
6
)
*
(
residual
+
2
*
e_2
+
2
*
e_3
+
e_4
)
else
:
residual
=
(
1
/
24
)
*
(
55
*
ets
[
-
1
]
-
59
*
ets
[
-
2
]
+
37
*
ets
[
-
3
]
-
9
*
ets
[
-
4
])
residual
=
(
1
/
24
)
*
(
55
*
ets
[
-
1
]
-
59
*
ets
[
-
2
]
+
37
*
ets
[
-
3
]
-
9
*
ets
[
-
4
])
img_next
=
self
.
noise_scheduler
.
transfer
(
image
.
to
(
"cpu"
),
t
,
t_next
,
residual
)
img_next
=
self
.
noise_scheduler
.
transfer
(
image
.
to
(
"cpu"
),
t
,
t_next
,
residual
)
...
@@ -82,53 +95,4 @@ class PNDM(DiffusionPipeline):
...
@@ -82,53 +95,4 @@ class PNDM(DiffusionPipeline):
step_idx
=
step_idx
-
1
step_idx
=
step_idx
-
1
# if len(prev_noises) in [1, 2]:
# t = (t + t_next) / 2
# elif len(prev_noises) == 3:
# t = t_next / 2
# if len(prev_noises) == 0:
# ets.append(residual)
#
# if len(ets) > 3:
# residual = (1 / 24) * (55 * ets[-1] - 59 * ets[-2] + 37 * ets[-3] - 9 * ets[-4])
# step_idx = step_idx - 1
# elif len(ets) <= 3 and len(prev_noises) == 3:
# residual = (1 / 6) * (prev_noises[-3] + 2 * prev_noises[-2] + 2 * prev_noises[-1] + residual)
# prev_noises = []
# step_idx = step_idx - 1
# elif len(ets) <= 3 and len(prev_noises) < 3:
# prev_noises.append(residual)
# if len(prev_noises) < 2:
# t_next = (t + t_next) / 2
#
# image = self.noise_scheduler.transfer(image.to("cpu"), t, t_next, residual)
return
image
return
image
# See formulas (12) and (16) of DDIM paper https://arxiv.org/pdf/2010.02502.pdf
# Ideally, read DDIM paper in-detail understanding
# Notation (<variable name> -> <name in paper>
# - pred_noise_t -> e_theta(x_t, t)
# - pred_original_image -> f_theta(x_t, t) or x_0
# - std_dev_t -> sigma_t
# - eta -> η
# - pred_image_direction -> "direction pointingc to x_t"
# - pred_prev_image -> "x_t-1"
# for t in tqdm.tqdm(reversed(range(num_inference_steps)), total=num_inference_steps):
# 1. predict noise residual
# with torch.no_grad():
# residual = self.unet(image, inference_step_times[t])
#
# 2. predict previous mean of image x_t-1
# pred_prev_image = self.noise_scheduler.step(residual, image, t, num_inference_steps, eta)
#
# 3. optionally sample variance
# variance = 0
# if eta > 0:
# noise = torch.randn(image.shape, generator=generator).to(image.device)
# variance = self.noise_scheduler.get_variance(t, num_inference_steps).sqrt() * eta * noise
#
# 4. set current image to prev_image: x_t -> x_t-1
# image = pred_prev_image + variance
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment