Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
802ccd93
Commit
802ccd93
authored
Jul 05, 2025
by
myhloli
Browse files
refactor: remove multi_gpu project reference from README files
parent
9d20d8d8
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
0 additions
and
183 deletions
+0
-183
projects/README.md
projects/README.md
+0
-1
projects/README_zh-CN.md
projects/README_zh-CN.md
+0
-1
projects/multi_gpu/README.md
projects/multi_gpu/README.md
+0
-44
projects/multi_gpu/client.py
projects/multi_gpu/client.py
+0
-39
projects/multi_gpu/server.py
projects/multi_gpu/server.py
+0
-98
No files found.
projects/README.md
View file @
802ccd93
...
@@ -3,5 +3,4 @@
...
@@ -3,5 +3,4 @@
## Project List
## Project List
-
Projects not yet compatible with version 2.0:
-
Projects not yet compatible with version 2.0:
-
[
multi_gpu
](
./multi_gpu/README.md
)
: Multi-GPU parallel processing based on LitServe
-
[
mcp
](
./mcp/README.md
)
: MCP server based on the official API
-
[
mcp
](
./mcp/README.md
)
: MCP server based on the official API
projects/README_zh-CN.md
View file @
802ccd93
...
@@ -3,5 +3,4 @@
...
@@ -3,5 +3,4 @@
## 项目列表
## 项目列表
-
未兼容2.0版本的项目列表
-
未兼容2.0版本的项目列表
-
[
multi_gpu
](
./multi_gpu/README.md
)
: 基于 LitServe 的多 GPU 并行处理
-
[
mcp
](
./mcp/README.md
)
: 基于官方api的mcp server
-
[
mcp
](
./mcp/README.md
)
: 基于官方api的mcp server
projects/multi_gpu/README.md
deleted
100644 → 0
View file @
9d20d8d8
## 项目简介
本项目提供基于 LitServe 的多 GPU 并行处理方案。LitServe 是一个简便且灵活的 AI 模型服务引擎,基于 FastAPI 构建。它为 FastAPI 增强了批处理、流式传输和 GPU 自动扩展等功能,无需为每个模型单独重建 FastAPI 服务器。
## 环境配置
请使用以下命令配置所需的环境:
```
bash
pip
install
-U
magic-pdf[full] litserve python-multipart filetype
```
## 快速使用
### 1. 启动服务端
以下示例展示了如何启动服务端,支持自定义设置:
```
python
server
=
ls
.
LitServer
(
MinerUAPI
(
output_dir
=
'/tmp'
),
# 可自定义输出文件夹
accelerator
=
'cuda'
,
# 启用 GPU 加速
devices
=
'auto'
,
# "auto" 使用所有 GPU
workers_per_device
=
1
,
# 每个 GPU 启动一个服务实例
timeout
=
False
# 设置为 False 以禁用超时
)
server
.
run
(
port
=
8000
)
# 设定服务端口为 8000
```
启动服务端命令:
```
bash
python server.py
```
### 2. 启动客户端
以下代码展示了客户端的使用方式,可根据需求修改配置:
```
python
files
=
[
'demo/small_ocr.pdf'
]
# 替换为文件路径,支持 pdf、jpg/jpeg、png、doc、docx、ppt、pptx 文件
n_jobs
=
np
.
clip
(
len
(
files
),
1
,
8
)
# 设置并发线程数,此处最大为 8,可根据自身修改
results
=
Parallel
(
n_jobs
,
prefer
=
'threads'
,
verbose
=
10
)(
delayed
(
do_parse
)(
p
)
for
p
in
files
)
print
(
results
)
```
启动客户端命令:
```
bash
python client.py
```
好了,你的文件会自动在多个 GPU 上并行处理!🍻🍻🍻
projects/multi_gpu/client.py
deleted
100644 → 0
View file @
9d20d8d8
import
base64
import
requests
import
numpy
as
np
from
loguru
import
logger
from
joblib
import
Parallel
,
delayed
def
to_b64
(
file_path
):
try
:
with
open
(
file_path
,
'rb'
)
as
f
:
return
base64
.
b64encode
(
f
.
read
()).
decode
(
'utf-8'
)
except
Exception
as
e
:
raise
Exception
(
f
'File:
{
file_path
}
- Info:
{
e
}
'
)
def
do_parse
(
file_path
,
url
=
'http://127.0.0.1:8000/predict'
,
**
kwargs
):
try
:
response
=
requests
.
post
(
url
,
json
=
{
'file'
:
to_b64
(
file_path
),
'kwargs'
:
kwargs
})
if
response
.
status_code
==
200
:
output
=
response
.
json
()
output
[
'file_path'
]
=
file_path
return
output
else
:
raise
Exception
(
response
.
text
)
except
Exception
as
e
:
logger
.
error
(
f
'File:
{
file_path
}
- Info:
{
e
}
'
)
if
__name__
==
'__main__'
:
files
=
[
'demo/small_ocr.pdf'
]
n_jobs
=
np
.
clip
(
len
(
files
),
1
,
8
)
results
=
Parallel
(
n_jobs
,
prefer
=
'threads'
,
verbose
=
10
)(
delayed
(
do_parse
)(
p
)
for
p
in
files
)
print
(
results
)
projects/multi_gpu/server.py
deleted
100644 → 0
View file @
9d20d8d8
import
os
import
uuid
import
shutil
import
tempfile
import
gc
import
fitz
import
torch
import
base64
import
filetype
import
litserve
as
ls
from
pathlib
import
Path
from
fastapi
import
HTTPException
class
MinerUAPI
(
ls
.
LitAPI
):
def
__init__
(
self
,
output_dir
=
'/tmp'
):
self
.
output_dir
=
Path
(
output_dir
)
def
setup
(
self
,
device
):
if
device
.
startswith
(
'cuda'
):
os
.
environ
[
'CUDA_VISIBLE_DEVICES'
]
=
device
.
split
(
':'
)[
-
1
]
if
torch
.
cuda
.
device_count
()
>
1
:
raise
RuntimeError
(
"Remove any CUDA actions before setting 'CUDA_VISIBLE_DEVICES'."
)
from
magic_pdf.tools.cli
import
do_parse
,
convert_file_to_pdf
from
magic_pdf.model.doc_analyze_by_custom_model
import
ModelSingleton
self
.
do_parse
=
do_parse
self
.
convert_file_to_pdf
=
convert_file_to_pdf
model_manager
=
ModelSingleton
()
model_manager
.
get_model
(
True
,
False
)
model_manager
.
get_model
(
False
,
False
)
print
(
f
'Model initialization complete on
{
device
}
!'
)
def
decode_request
(
self
,
request
):
file
=
request
[
'file'
]
file
=
self
.
cvt2pdf
(
file
)
opts
=
request
.
get
(
'kwargs'
,
{})
opts
.
setdefault
(
'debug_able'
,
False
)
opts
.
setdefault
(
'parse_method'
,
'auto'
)
return
file
,
opts
def
predict
(
self
,
inputs
):
try
:
pdf_name
=
str
(
uuid
.
uuid4
())
output_dir
=
self
.
output_dir
.
joinpath
(
pdf_name
)
self
.
do_parse
(
self
.
output_dir
,
pdf_name
,
inputs
[
0
],
[],
**
inputs
[
1
])
return
output_dir
except
Exception
as
e
:
shutil
.
rmtree
(
output_dir
,
ignore_errors
=
True
)
raise
HTTPException
(
status_code
=
500
,
detail
=
str
(
e
))
finally
:
self
.
clean_memory
()
def
encode_response
(
self
,
response
):
return
{
'output_dir'
:
response
}
def
clean_memory
(
self
):
if
torch
.
cuda
.
is_available
():
torch
.
cuda
.
empty_cache
()
torch
.
cuda
.
ipc_collect
()
gc
.
collect
()
def
cvt2pdf
(
self
,
file_base64
):
try
:
temp_dir
=
Path
(
tempfile
.
mkdtemp
())
temp_file
=
temp_dir
.
joinpath
(
'tmpfile'
)
file_bytes
=
base64
.
b64decode
(
file_base64
)
file_ext
=
filetype
.
guess_extension
(
file_bytes
)
if
file_ext
in
[
'pdf'
,
'jpg'
,
'png'
,
'doc'
,
'docx'
,
'ppt'
,
'pptx'
]:
if
file_ext
==
'pdf'
:
return
file_bytes
elif
file_ext
in
[
'jpg'
,
'png'
]:
with
fitz
.
open
(
stream
=
file_bytes
,
filetype
=
file_ext
)
as
f
:
return
f
.
convert_to_pdf
()
else
:
temp_file
.
write_bytes
(
file_bytes
)
self
.
convert_file_to_pdf
(
temp_file
,
temp_dir
)
return
temp_file
.
with_suffix
(
'.pdf'
).
read_bytes
()
else
:
raise
Exception
(
'Unsupported file format'
)
except
Exception
as
e
:
raise
HTTPException
(
status_code
=
500
,
detail
=
str
(
e
))
finally
:
shutil
.
rmtree
(
temp_dir
,
ignore_errors
=
True
)
if
__name__
==
'__main__'
:
server
=
ls
.
LitServer
(
MinerUAPI
(
output_dir
=
'/tmp'
),
accelerator
=
'cuda'
,
devices
=
'auto'
,
workers_per_device
=
1
,
timeout
=
False
)
server
.
run
(
port
=
8000
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment