Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
GLM-4V_pytorch
Commits
1bfbcff0
Commit
1bfbcff0
authored
Jun 13, 2024
by
wanglch
Browse files
Initial commit
parents
Pipeline
#1204
canceled with stages
Changes
707
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
660 additions
and
0 deletions
+660
-0
swift-main/swift/aigc/diffusers/infer_dreambooth.py
swift-main/swift/aigc/diffusers/infer_dreambooth.py
+76
-0
swift-main/swift/aigc/diffusers/infer_dreambooth_lora.py
swift-main/swift/aigc/diffusers/infer_dreambooth_lora.py
+99
-0
swift-main/swift/aigc/diffusers/infer_dreambooth_lora_sdxl.py
...t-main/swift/aigc/diffusers/infer_dreambooth_lora_sdxl.py
+97
-0
swift-main/swift/aigc/diffusers/infer_text_to_image.py
swift-main/swift/aigc/diffusers/infer_text_to_image.py
+96
-0
swift-main/swift/aigc/diffusers/infer_text_to_image_lora.py
swift-main/swift/aigc/diffusers/infer_text_to_image_lora.py
+98
-0
swift-main/swift/aigc/diffusers/infer_text_to_image_lora_sdxl.py
...ain/swift/aigc/diffusers/infer_text_to_image_lora_sdxl.py
+98
-0
swift-main/swift/aigc/diffusers/infer_text_to_image_sdxl.py
swift-main/swift/aigc/diffusers/infer_text_to_image_sdxl.py
+96
-0
No files found.
Too many changes to show.
To preserve performance only
707 of 707+
files are displayed.
Plain diff
Email patch
swift-main/swift/aigc/diffusers/infer_dreambooth.py
0 → 100644
View file @
1bfbcff0
# Copyright (c) Alibaba, Inc. and its affiliates.
import
argparse
import
os
import
torch
from
diffusers
import
StableDiffusionPipeline
from
modelscope
import
snapshot_download
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Simple example of a dreambooth inference.'
)
parser
.
add_argument
(
'--model_path'
,
type
=
str
,
default
=
None
,
required
=
True
,
help
=
'Path to trained model.'
,
)
parser
.
add_argument
(
'--prompt'
,
type
=
str
,
default
=
None
,
required
=
True
,
help
=
'The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`'
,
)
parser
.
add_argument
(
'--image_save_path'
,
type
=
str
,
default
=
None
,
required
=
True
,
help
=
'The path to save generated image'
,
)
parser
.
add_argument
(
'--torch_dtype'
,
type
=
str
,
default
=
None
,
choices
=
[
'no'
,
'fp16'
,
'bf16'
],
help
=
(
'Choose between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >='
' 1.10.and an Nvidia Ampere GPU. Default to the value of the'
' mixed_precision passed with the `accelerate.launch` command in training script.'
),
)
parser
.
add_argument
(
'--num_inference_steps'
,
type
=
int
,
default
=
50
,
help
=
(
'The number of denoising steps. More denoising steps usually lead to a higher quality image at the
\
expense of slower inference.'
),
)
parser
.
add_argument
(
'--guidance_scale'
,
type
=
float
,
default
=
7.5
,
help
=
(
'A higher guidance scale value encourages the model to generate images closely linked to the text
\
`prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.'
),
)
args
=
parser
.
parse_args
()
return
args
def
main
():
args
=
parse_args
()
if
args
.
torch_dtype
==
'fp16'
:
torch_dtype
=
torch
.
float16
elif
args
.
torch_dtype
==
'bf16'
:
torch_dtype
=
torch
.
bfloat16
else
:
torch_dtype
=
torch
.
float32
pipe
=
StableDiffusionPipeline
.
from_pretrained
(
args
.
model_path
,
torch_dtype
=
torch_dtype
).
to
(
'cuda'
)
image
=
pipe
(
args
.
prompt
,
num_inference_steps
=
args
.
num_inference_steps
,
guidance_scale
=
args
.
guidance_scale
).
images
[
0
]
image
.
save
(
args
.
image_save_path
)
swift-main/swift/aigc/diffusers/infer_dreambooth_lora.py
0 → 100644
View file @
1bfbcff0
# Copyright (c) Alibaba, Inc. and its affiliates.
import
argparse
import
os
import
torch
from
diffusers
import
DiffusionPipeline
,
DPMSolverMultistepScheduler
from
modelscope
import
snapshot_download
from
swift
import
Swift
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Simple example of a text to image inference.'
)
parser
.
add_argument
(
'--base_model_path'
,
type
=
str
,
default
=
'AI-ModelScope/stable-diffusion-v1-5'
,
required
=
True
,
help
=
'Path to pretrained model or model identifier from modelscope.cn/models.'
,
)
parser
.
add_argument
(
'--revision'
,
type
=
str
,
default
=
None
,
required
=
False
,
help
=
'Revision of pretrained model identifier from modelscope.cn/models.'
,
)
parser
.
add_argument
(
'--lora_model_path'
,
type
=
str
,
default
=
None
,
required
=
False
,
help
=
'The path to trained lora model.'
,
)
parser
.
add_argument
(
'--prompt'
,
type
=
str
,
default
=
None
,
required
=
True
,
help
=
'The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`'
,
)
parser
.
add_argument
(
'--image_save_path'
,
type
=
str
,
default
=
None
,
required
=
True
,
help
=
'The path to save generated image'
,
)
parser
.
add_argument
(
'--torch_dtype'
,
type
=
str
,
default
=
None
,
choices
=
[
'no'
,
'fp16'
,
'bf16'
],
help
=
(
'Choose between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >='
' 1.10.and an Nvidia Ampere GPU. Default to the value of the'
' mixed_precision passed with the `accelerate.launch` command in training script.'
),
)
parser
.
add_argument
(
'--num_inference_steps'
,
type
=
int
,
default
=
50
,
help
=
(
'The number of denoising steps. More denoising steps usually lead to a higher quality image at the
\
expense of slower inference.'
),
)
parser
.
add_argument
(
'--guidance_scale'
,
type
=
float
,
default
=
7.5
,
help
=
(
'A higher guidance scale value encourages the model to generate images closely linked to the text
\
`prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.'
),
)
args
=
parser
.
parse_args
()
return
args
def
main
():
args
=
parse_args
()
if
os
.
path
.
exists
(
args
.
base_model_path
):
base_model_path
=
args
.
base_model_path
else
:
base_model_path
=
snapshot_download
(
args
.
base_model_path
,
revision
=
args
.
revision
)
if
args
.
torch_dtype
==
'fp16'
:
torch_dtype
=
torch
.
float16
elif
args
.
torch_dtype
==
'bf16'
:
torch_dtype
=
torch
.
bfloat16
else
:
torch_dtype
=
torch
.
float32
pipe
=
DiffusionPipeline
.
from_pretrained
(
base_model_path
,
torch_dtype
=
torch_dtype
)
pipe
.
scheduler
=
DPMSolverMultistepScheduler
.
from_config
(
pipe
.
scheduler
.
config
)
if
args
.
lora_model_path
is
not
None
:
pipe
.
unet
=
Swift
.
from_pretrained
(
pipe
.
unet
,
args
.
lora_model_path
)
pipe
.
to
(
'cuda'
)
image
=
pipe
(
args
.
prompt
,
num_inference_steps
=
args
.
num_inference_steps
).
images
[
0
]
image
.
save
(
args
.
image_save_path
)
swift-main/swift/aigc/diffusers/infer_dreambooth_lora_sdxl.py
0 → 100644
View file @
1bfbcff0
# Copyright (c) Alibaba, Inc. and its affiliates.
import
argparse
import
os
import
torch
from
diffusers
import
DiffusionPipeline
from
modelscope
import
snapshot_download
from
swift
import
Swift
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Simple example of a text to image inference.'
)
parser
.
add_argument
(
'--base_model_path'
,
type
=
str
,
default
=
'AI-ModelScope/stable-diffusion-xl-base-1.0'
,
required
=
True
,
help
=
'Path to pretrained model or model identifier from modelscope.cn/models.'
,
)
parser
.
add_argument
(
'--revision'
,
type
=
str
,
default
=
None
,
required
=
False
,
help
=
'Revision of pretrained model identifier from modelscope.cn/models.'
,
)
parser
.
add_argument
(
'--lora_model_path'
,
type
=
str
,
default
=
None
,
required
=
False
,
help
=
'The path to trained lora model.'
,
)
parser
.
add_argument
(
'--prompt'
,
type
=
str
,
default
=
None
,
required
=
True
,
help
=
'The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`'
,
)
parser
.
add_argument
(
'--image_save_path'
,
type
=
str
,
default
=
None
,
required
=
True
,
help
=
'The path to save generated image'
,
)
parser
.
add_argument
(
'--torch_dtype'
,
type
=
str
,
default
=
None
,
choices
=
[
'no'
,
'fp16'
,
'bf16'
],
help
=
(
'Choose between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >='
' 1.10.and an Nvidia Ampere GPU. Default to the value of the'
' mixed_precision passed with the `accelerate.launch` command in training script.'
),
)
parser
.
add_argument
(
'--seed'
,
type
=
int
,
default
=
None
,
help
=
'A seed for inference.'
)
parser
.
add_argument
(
'--num_inference_steps'
,
type
=
int
,
default
=
50
,
help
=
(
'The number of denoising steps. More denoising steps usually lead to a higher quality image at the
\
expense of slower inference.'
),
)
parser
.
add_argument
(
'--guidance_scale'
,
type
=
float
,
default
=
7.5
,
help
=
(
'A higher guidance scale value encourages the model to generate images closely linked to the text
\
`prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.'
),
)
args
=
parser
.
parse_args
()
return
args
def
main
():
args
=
parse_args
()
if
os
.
path
.
exists
(
args
.
base_model_path
):
base_model_path
=
args
.
base_model_path
else
:
base_model_path
=
snapshot_download
(
args
.
base_model_path
,
revision
=
args
.
revision
)
if
args
.
torch_dtype
==
'fp16'
:
torch_dtype
=
torch
.
float16
elif
args
.
torch_dtype
==
'bf16'
:
torch_dtype
=
torch
.
bfloat16
else
:
torch_dtype
=
torch
.
float32
pipe
=
DiffusionPipeline
.
from_pretrained
(
base_model_path
,
torch_dtype
=
torch_dtype
)
if
args
.
lora_model_path
is
not
None
:
pipe
.
unet
=
Swift
.
from_pretrained
(
pipe
.
unet
,
args
.
lora_model_path
)
pipe
=
pipe
.
to
(
'cuda'
)
image
=
pipe
(
args
.
prompt
,
num_inference_steps
=
args
.
num_inference_steps
).
images
[
0
]
image
.
save
(
args
.
image_save_path
)
swift-main/swift/aigc/diffusers/infer_text_to_image.py
0 → 100644
View file @
1bfbcff0
# Copyright (c) Alibaba, Inc. and its affiliates.
import
argparse
import
os
import
torch
from
diffusers
import
StableDiffusionPipeline
,
UNet2DConditionModel
from
modelscope
import
snapshot_download
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Simple example of a text to image inference.'
)
parser
.
add_argument
(
'--pretrained_model_name_or_path'
,
type
=
str
,
default
=
'AI-ModelScope/stable-diffusion-v1-5'
,
required
=
True
,
help
=
'Path to pretrained model or model identifier from modelscope.cn/models.'
,
)
parser
.
add_argument
(
'--revision'
,
type
=
str
,
default
=
None
,
required
=
False
,
help
=
'Revision of pretrained model identifier from modelscope.cn/models.'
,
)
parser
.
add_argument
(
'--unet_model_path'
,
type
=
str
,
default
=
None
,
required
=
False
,
help
=
'The path to trained unet model.'
,
)
parser
.
add_argument
(
'--prompt'
,
type
=
str
,
default
=
None
,
required
=
True
,
help
=
'The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`'
,
)
parser
.
add_argument
(
'--image_save_path'
,
type
=
str
,
default
=
None
,
required
=
True
,
help
=
'The path to save generated image'
,
)
parser
.
add_argument
(
'--torch_dtype'
,
type
=
str
,
default
=
None
,
choices
=
[
'no'
,
'fp16'
,
'bf16'
],
help
=
(
'Choose between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >='
' 1.10.and an Nvidia Ampere GPU. Default to the value of the'
' mixed_precision passed with the `accelerate.launch` command in training script.'
),
)
parser
.
add_argument
(
'--num_inference_steps'
,
type
=
int
,
default
=
50
,
help
=
(
'The number of denoising steps. More denoising steps usually lead to a higher quality image at the
\
expense of slower inference.'
),
)
parser
.
add_argument
(
'--guidance_scale'
,
type
=
float
,
default
=
7.5
,
help
=
(
'A higher guidance scale value encourages the model to generate images closely linked to the text
\
`prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.'
),
)
args
=
parser
.
parse_args
()
return
args
def
main
():
args
=
parse_args
()
if
os
.
path
.
exists
(
args
.
pretrained_model_name_or_path
):
model_path
=
args
.
pretrained_model_name_or_path
else
:
model_path
=
snapshot_download
(
args
.
pretrained_model_name_or_path
,
revision
=
args
.
revision
)
if
args
.
torch_dtype
==
'fp16'
:
torch_dtype
=
torch
.
float16
elif
args
.
torch_dtype
==
'bf16'
:
torch_dtype
=
torch
.
bfloat16
else
:
torch_dtype
=
torch
.
float32
pipe
=
StableDiffusionPipeline
.
from_pretrained
(
model_path
,
torch_dtype
=
torch_dtype
)
if
args
.
unet_model_path
is
not
None
:
pipe
.
unet
=
UNet2DConditionModel
.
from_pretrained
(
args
.
unet_model_path
,
torch_dtype
=
torch_dtype
)
pipe
.
to
(
'cuda'
)
image
=
pipe
(
prompt
=
args
.
prompt
,
num_inference_steps
=
args
.
num_inference_steps
,
guidance_scale
=
args
.
guidance_scale
).
images
[
0
]
image
.
save
(
args
.
image_save_path
)
swift-main/swift/aigc/diffusers/infer_text_to_image_lora.py
0 → 100644
View file @
1bfbcff0
# Copyright (c) Alibaba, Inc. and its affiliates.
import
argparse
import
os
import
torch
from
diffusers
import
StableDiffusionPipeline
from
modelscope
import
snapshot_download
from
swift
import
Swift
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Simple example of a text to image inference.'
)
parser
.
add_argument
(
'--pretrained_model_name_or_path'
,
type
=
str
,
default
=
'AI-ModelScope/stable-diffusion-v1-5'
,
required
=
True
,
help
=
'Path to pretrained model or model identifier from modelscope.cn/models.'
,
)
parser
.
add_argument
(
'--revision'
,
type
=
str
,
default
=
None
,
required
=
False
,
help
=
'Revision of pretrained model identifier from modelscope.cn/models.'
,
)
parser
.
add_argument
(
'--lora_model_path'
,
type
=
str
,
default
=
None
,
required
=
False
,
help
=
'The path to trained lora model.'
,
)
parser
.
add_argument
(
'--prompt'
,
type
=
str
,
default
=
None
,
required
=
True
,
help
=
'The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`'
,
)
parser
.
add_argument
(
'--image_save_path'
,
type
=
str
,
default
=
None
,
required
=
True
,
help
=
'The path to save generated image'
,
)
parser
.
add_argument
(
'--torch_dtype'
,
type
=
str
,
default
=
None
,
choices
=
[
'no'
,
'fp16'
,
'bf16'
],
help
=
(
'Choose between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >='
' 1.10.and an Nvidia Ampere GPU. Default to the value of the'
' mixed_precision passed with the `accelerate.launch` command in training script.'
),
)
parser
.
add_argument
(
'--num_inference_steps'
,
type
=
int
,
default
=
30
,
help
=
(
'The number of denoising steps. More denoising steps usually lead to a higher quality image at the
\
expense of slower inference.'
),
)
parser
.
add_argument
(
'--guidance_scale'
,
type
=
float
,
default
=
7.5
,
help
=
(
'A higher guidance scale value encourages the model to generate images closely linked to the text
\
`prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.'
),
)
args
=
parser
.
parse_args
()
return
args
def
main
():
args
=
parse_args
()
if
os
.
path
.
exists
(
args
.
pretrained_model_name_or_path
):
model_path
=
args
.
pretrained_model_name_or_path
else
:
model_path
=
snapshot_download
(
args
.
pretrained_model_name_or_path
,
revision
=
args
.
revision
)
if
args
.
torch_dtype
==
'fp16'
:
torch_dtype
=
torch
.
float16
elif
args
.
torch_dtype
==
'bf16'
:
torch_dtype
=
torch
.
bfloat16
else
:
torch_dtype
=
torch
.
float32
pipe
=
StableDiffusionPipeline
.
from_pretrained
(
model_path
,
torch_dtype
=
torch_dtype
)
if
args
.
lora_model_path
is
not
None
:
pipe
.
unet
=
Swift
.
from_pretrained
(
pipe
.
unet
,
args
.
lora_model_path
)
pipe
.
to
(
'cuda'
)
image
=
pipe
(
prompt
=
args
.
prompt
,
num_inference_steps
=
args
.
num_inference_steps
,
guidance_scale
=
args
.
guidance_scale
).
images
[
0
]
image
.
save
(
args
.
image_save_path
)
swift-main/swift/aigc/diffusers/infer_text_to_image_lora_sdxl.py
0 → 100644
View file @
1bfbcff0
# Copyright (c) Alibaba, Inc. and its affiliates.
import
argparse
import
os
import
torch
from
diffusers
import
StableDiffusionXLPipeline
,
UNet2DConditionModel
from
modelscope
import
snapshot_download
from
swift
import
Swift
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Simple example of a text to image lora sdxl inference.'
)
parser
.
add_argument
(
'--pretrained_model_name_or_path'
,
type
=
str
,
default
=
'AI-ModelScope/stable-diffusion-v1-5'
,
required
=
True
,
help
=
'Path to pretrained model or model identifier from modelscope.cn/models.'
,
)
parser
.
add_argument
(
'--revision'
,
type
=
str
,
default
=
None
,
required
=
False
,
help
=
'Revision of pretrained model identifier from modelscope.cn/models.'
,
)
parser
.
add_argument
(
'--lora_model_path'
,
type
=
str
,
default
=
None
,
required
=
False
,
help
=
'The path to trained lora model.'
,
)
parser
.
add_argument
(
'--prompt'
,
type
=
str
,
default
=
None
,
required
=
True
,
help
=
'The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`'
,
)
parser
.
add_argument
(
'--image_save_path'
,
type
=
str
,
default
=
None
,
required
=
True
,
help
=
'The path to save generated image'
,
)
parser
.
add_argument
(
'--torch_dtype'
,
type
=
str
,
default
=
None
,
choices
=
[
'no'
,
'fp16'
,
'bf16'
],
help
=
(
'Choose between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >='
' 1.10.and an Nvidia Ampere GPU. Default to the value of the'
' mixed_precision passed with the `accelerate.launch` command in training script.'
),
)
parser
.
add_argument
(
'--num_inference_steps'
,
type
=
int
,
default
=
30
,
help
=
(
'The number of denoising steps. More denoising steps usually lead to a higher quality image at the
\
expense of slower inference.'
),
)
parser
.
add_argument
(
'--guidance_scale'
,
type
=
float
,
default
=
7.5
,
help
=
(
'A higher guidance scale value encourages the model to generate images closely linked to the text
\
`prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.'
),
)
args
=
parser
.
parse_args
()
return
args
def
main
():
args
=
parse_args
()
if
os
.
path
.
exists
(
args
.
pretrained_model_name_or_path
):
model_path
=
args
.
pretrained_model_name_or_path
else
:
model_path
=
snapshot_download
(
args
.
pretrained_model_name_or_path
,
revision
=
args
.
revision
)
if
args
.
torch_dtype
==
'fp16'
:
torch_dtype
=
torch
.
float16
elif
args
.
torch_dtype
==
'bf16'
:
torch_dtype
=
torch
.
bfloat16
else
:
torch_dtype
=
torch
.
float32
pipe
=
StableDiffusionXLPipeline
.
from_pretrained
(
model_path
,
torch_dtype
=
torch_dtype
)
if
args
.
lora_model_path
is
not
None
:
pipe
.
unet
=
Swift
.
from_pretrained
(
pipe
.
unet
,
args
.
lora_model_path
)
pipe
.
to
(
'cuda'
)
image
=
pipe
(
prompt
=
args
.
prompt
,
num_inference_steps
=
args
.
num_inference_steps
,
guidance_scale
=
args
.
guidance_scale
).
images
[
0
]
image
.
save
(
args
.
image_save_path
)
swift-main/swift/aigc/diffusers/infer_text_to_image_sdxl.py
0 → 100644
View file @
1bfbcff0
# Copyright (c) Alibaba, Inc. and its affiliates.
import
argparse
import
os
import
torch
from
diffusers
import
DiffusionPipeline
,
UNet2DConditionModel
from
modelscope
import
snapshot_download
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Simple example of a text to image inference.'
)
parser
.
add_argument
(
'--pretrained_model_name_or_path'
,
type
=
str
,
default
=
'AI-ModelScope/stable-diffusion-v1-5'
,
required
=
True
,
help
=
'Path to pretrained model or model identifier from modelscope.cn/models.'
,
)
parser
.
add_argument
(
'--revision'
,
type
=
str
,
default
=
None
,
required
=
False
,
help
=
'Revision of pretrained model identifier from modelscope.cn/models.'
,
)
parser
.
add_argument
(
'--unet_model_path'
,
type
=
str
,
default
=
None
,
required
=
False
,
help
=
'The path to trained unet model.'
,
)
parser
.
add_argument
(
'--prompt'
,
type
=
str
,
default
=
None
,
required
=
True
,
help
=
'The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`'
,
)
parser
.
add_argument
(
'--image_save_path'
,
type
=
str
,
default
=
None
,
required
=
True
,
help
=
'The path to save generated image'
,
)
parser
.
add_argument
(
'--torch_dtype'
,
type
=
str
,
default
=
None
,
choices
=
[
'no'
,
'fp16'
,
'bf16'
],
help
=
(
'Choose between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >='
' 1.10.and an Nvidia Ampere GPU. Default to the value of the'
' mixed_precision passed with the `accelerate.launch` command in training script.'
),
)
parser
.
add_argument
(
'--num_inference_steps'
,
type
=
int
,
default
=
30
,
help
=
(
'The number of denoising steps. More denoising steps usually lead to a higher quality image at the
\
expense of slower inference.'
),
)
parser
.
add_argument
(
'--guidance_scale'
,
type
=
float
,
default
=
7.5
,
help
=
(
'A higher guidance scale value encourages the model to generate images closely linked to the text
\
`prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.'
),
)
args
=
parser
.
parse_args
()
return
args
def
main
():
args
=
parse_args
()
if
os
.
path
.
exists
(
args
.
pretrained_model_name_or_path
):
model_path
=
args
.
pretrained_model_name_or_path
else
:
model_path
=
snapshot_download
(
args
.
pretrained_model_name_or_path
,
revision
=
args
.
revision
)
if
args
.
torch_dtype
==
'fp16'
:
torch_dtype
=
torch
.
float16
elif
args
.
torch_dtype
==
'bf16'
:
torch_dtype
=
torch
.
bfloat16
else
:
torch_dtype
=
torch
.
float32
pipe
=
DiffusionPipeline
.
from_pretrained
(
model_path
,
torch_dtype
=
torch_dtype
)
if
args
.
unet_model_path
is
not
None
:
pipe
.
unet
=
UNet2DConditionModel
.
from_pretrained
(
args
.
unet_model_path
,
torch_dtype
=
torch_dtype
)
pipe
.
to
(
'cuda'
)
image
=
pipe
(
prompt
=
args
.
prompt
,
num_inference_steps
=
args
.
num_inference_steps
,
guidance_scale
=
args
.
guidance_scale
).
images
[
0
]
image
.
save
(
args
.
image_save_path
)
Prev
1
…
32
33
34
35
36
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment