Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
vllm-omni-das
Commits
23cdd9a4
Commit
23cdd9a4
authored
Apr 15, 2026
by
chenpangpang
Browse files
add ERNIE-Image OpenAI image generation API endpoint
parent
ee424554
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
188 additions
and
0 deletions
+188
-0
vllm_omni/entrypoints/openai/ernie_image_api_server.py
vllm_omni/entrypoints/openai/ernie_image_api_server.py
+188
-0
No files found.
vllm_omni/entrypoints/openai/ernie_image_api_server.py
0 → 100644
View file @
23cdd9a4
# SPDX-License-Identifier: Apache-2.0
"""Standalone OpenAI-compatible ERNIE-Image API server."""
from
__future__
import
annotations
import
argparse
import
asyncio
import
base64
import
io
import
os
import
random
import
time
from
enum
import
Enum
from
http
import
HTTPStatus
import
numpy
as
np
import
torch
import
uvicorn
from
diffusers
import
ErnieImagePipeline
from
fastapi
import
FastAPI
,
HTTPException
from
pydantic
import
BaseModel
,
Field
,
field_validator
DEFAULT_MODEL_PATH
=
"/root/private_data/chenyh/models/ERNIE-Image"
os
.
environ
.
setdefault
(
"CUBLAS_WORKSPACE_CONFIG"
,
":4096:8"
)
class
ResponseFormat
(
str
,
Enum
):
B64_JSON
=
"b64_json"
class
ImageGenerationRequest
(
BaseModel
):
prompt
:
str
=
Field
(...,
description
=
"Text description of image"
)
model
:
str
|
None
=
Field
(
default
=
None
,
description
=
"Optional model id"
)
n
:
int
=
Field
(
default
=
1
,
ge
=
1
,
le
=
10
)
size
:
str
|
None
=
Field
(
default
=
None
,
description
=
"Image dimensions in WIDTHxHEIGHT format"
)
response_format
:
ResponseFormat
=
Field
(
default
=
ResponseFormat
.
B64_JSON
)
user
:
str
|
None
=
Field
(
default
=
None
)
negative_prompt
:
str
|
None
=
Field
(
default
=
None
)
num_inference_steps
:
int
|
None
=
Field
(
default
=
None
,
ge
=
1
,
le
=
200
)
guidance_scale
:
float
|
None
=
Field
(
default
=
None
,
ge
=
0.0
,
le
=
20.0
)
seed
:
int
|
None
=
Field
(
default
=
None
)
@
field_validator
(
"size"
)
@
classmethod
def
validate_size
(
cls
,
value
:
str
|
None
)
->
str
|
None
:
if
value
is
None
:
return
value
parse_size
(
value
)
return
value
class
ImageData
(
BaseModel
):
b64_json
:
str
|
None
=
Field
(
default
=
None
)
url
:
str
|
None
=
Field
(
default
=
None
)
revised_prompt
:
str
|
None
=
Field
(
default
=
None
)
class
ImageGenerationResponse
(
BaseModel
):
created
:
int
data
:
list
[
ImageData
]
output_format
:
str
|
None
=
None
size
:
str
|
None
=
None
def
parse_size
(
size_str
:
str
)
->
tuple
[
int
,
int
]:
parts
=
size_str
.
split
(
"x"
)
if
len
(
parts
)
!=
2
:
raise
ValueError
(
"size must be in WIDTHxHEIGHT format, for example 1024x1024"
)
try
:
width
=
int
(
parts
[
0
])
height
=
int
(
parts
[
1
])
except
ValueError
as
exc
:
raise
ValueError
(
"size width and height must be integers"
)
from
exc
if
width
<=
0
or
height
<=
0
:
raise
ValueError
(
"size width and height must be positive"
)
return
width
,
height
def
encode_image_base64
(
image
)
->
str
:
buffer
=
io
.
BytesIO
()
image
.
save
(
buffer
,
format
=
"PNG"
)
buffer
.
seek
(
0
)
return
base64
.
b64encode
(
buffer
.
read
()).
decode
(
"utf-8"
)
class
ErnieImageService
:
def
__init__
(
self
,
model_path
:
str
,
torch_dtype
:
str
=
"bfloat16"
)
->
None
:
dtype
=
getattr
(
torch
,
torch_dtype
)
self
.
model_path
=
model_path
self
.
pipe
=
ErnieImagePipeline
.
from_pretrained
(
model_path
,
torch_dtype
=
dtype
)
self
.
pipe
=
self
.
pipe
.
to
(
"cuda"
)
self
.
pipe
.
transformer
.
eval
()
self
.
pipe
.
vae
.
eval
()
self
.
pipe
.
text_encoder
.
eval
()
self
.
pipe
.
pe
.
eval
()
self
.
_lock
=
asyncio
.
Lock
()
@
staticmethod
def
_set_seed
(
seed
:
int
)
->
torch
.
Generator
:
random
.
seed
(
seed
)
np
.
random
.
seed
(
seed
)
torch
.
manual_seed
(
seed
)
torch
.
cuda
.
manual_seed_all
(
seed
)
torch
.
backends
.
cudnn
.
deterministic
=
True
torch
.
use_deterministic_algorithms
(
True
)
torch
.
backends
.
cudnn
.
benchmark
=
False
return
torch
.
Generator
(
device
=
"cuda"
).
manual_seed
(
seed
)
async
def
generate
(
self
,
request
:
ImageGenerationRequest
)
->
ImageGenerationResponse
:
width
,
height
=
(
parse_size
(
request
.
size
)
if
request
.
size
else
(
1024
,
1024
))
steps
=
request
.
num_inference_steps
if
request
.
num_inference_steps
is
not
None
else
50
guidance_scale
=
request
.
guidance_scale
if
request
.
guidance_scale
is
not
None
else
5.0
image_data
:
list
[
ImageData
]
=
[]
async
with
self
.
_lock
:
for
idx
in
range
(
request
.
n
):
seed
=
random
.
randint
(
0
,
2
**
31
-
1
)
if
request
.
seed
is
None
else
request
.
seed
+
idx
generator
=
self
.
_set_seed
(
seed
)
with
torch
.
inference_mode
():
output
=
self
.
pipe
(
prompt
=
request
.
prompt
,
negative_prompt
=
request
.
negative_prompt
or
None
,
height
=
height
,
width
=
width
,
num_inference_steps
=
steps
,
guidance_scale
=
guidance_scale
,
generator
=
generator
,
)
revised_prompts
=
getattr
(
output
,
"revised_prompts"
,
None
)
image_data
.
append
(
ImageData
(
b64_json
=
encode_image_base64
(
output
.
images
[
0
]),
revised_prompt
=
(
revised_prompts
[
0
]
if
revised_prompts
else
None
),
)
)
return
ImageGenerationResponse
(
created
=
int
(
time
.
time
()),
data
=
image_data
,
output_format
=
"png"
,
size
=
f
"
{
width
}
x
{
height
}
"
,
)
def
build_app
(
service
:
ErnieImageService
)
->
FastAPI
:
app
=
FastAPI
(
title
=
"ERNIE-Image OpenAI API"
,
version
=
"0.1.0"
)
@
app
.
get
(
"/healthz"
)
async
def
healthz
()
->
dict
[
str
,
str
]:
return
{
"status"
:
"healthy"
}
@
app
.
post
(
"/v1/images/generations"
,
response_model
=
ImageGenerationResponse
)
async
def
generate_images
(
request
:
ImageGenerationRequest
)
->
ImageGenerationResponse
:
if
not
request
.
prompt
or
not
request
.
prompt
.
strip
():
raise
HTTPException
(
status_code
=
HTTPStatus
.
BAD_REQUEST
,
detail
=
"prompt cannot be empty"
)
if
request
.
response_format
!=
ResponseFormat
.
B64_JSON
:
raise
HTTPException
(
status_code
=
HTTPStatus
.
BAD_REQUEST
,
detail
=
"Only b64_json is supported"
)
try
:
return
await
service
.
generate
(
request
)
except
HTTPException
:
raise
except
ValueError
as
exc
:
raise
HTTPException
(
status_code
=
HTTPStatus
.
BAD_REQUEST
,
detail
=
str
(
exc
))
from
exc
except
Exception
as
exc
:
raise
HTTPException
(
status_code
=
HTTPStatus
.
INTERNAL_SERVER_ERROR
,
detail
=
f
"Image generation failed:
{
exc
}
"
)
from
exc
return
app
def
parse_args
()
->
argparse
.
Namespace
:
parser
=
argparse
.
ArgumentParser
(
description
=
"Run ERNIE-Image OpenAI-compatible API server"
)
parser
.
add_argument
(
"--host"
,
default
=
"0.0.0.0"
,
help
=
"Server host"
)
parser
.
add_argument
(
"--port"
,
type
=
int
,
default
=
8091
,
help
=
"Server port"
)
parser
.
add_argument
(
"--model-path"
,
default
=
DEFAULT_MODEL_PATH
,
help
=
"Path to ERNIE-Image model"
)
parser
.
add_argument
(
"--torch-dtype"
,
default
=
"bfloat16"
,
choices
=
[
"float16"
,
"bfloat16"
,
"float32"
])
return
parser
.
parse_args
()
def
main
()
->
None
:
args
=
parse_args
()
service
=
ErnieImageService
(
model_path
=
args
.
model_path
,
torch_dtype
=
args
.
torch_dtype
)
app
=
build_app
(
service
)
uvicorn
.
run
(
app
,
host
=
args
.
host
,
port
=
args
.
port
)
if
__name__
==
"__main__"
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment