Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhougaofeng
magic_pdf
Commits
4781cd49
Commit
4781cd49
authored
Oct 25, 2024
by
zhougaofeng
Browse files
Update pdf_server.py
parent
68851ae0
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
0 deletions
+10
-0
magic_pdf/tools/pdf_server.py
magic_pdf/tools/pdf_server.py
+10
-0
No files found.
magic_pdf/tools/pdf_server.py
View file @
4781cd49
...
@@ -6,6 +6,8 @@ from loguru import logger
...
@@ -6,6 +6,8 @@ from loguru import logger
from
typing
import
List
from
typing
import
List
from
fastapi
import
FastAPI
,
HTTPException
,
Request
from
fastapi
import
FastAPI
,
HTTPException
,
Request
import
magic_pdf.model
as
model_config
import
magic_pdf.model
as
model_config
from
magic_pdf.dict2md.ocr_client
import
PredictClient
# from magic_pdf.dict2md.ocr_vllm_client import PredictClient
from
magic_pdf.libs.version
import
__version__
from
magic_pdf.libs.version
import
__version__
from
magic_pdf.rw.AbsReaderWriter
import
AbsReaderWriter
from
magic_pdf.rw.AbsReaderWriter
import
AbsReaderWriter
from
magic_pdf.rw.DiskReaderWriter
import
DiskReaderWriter
from
magic_pdf.rw.DiskReaderWriter
import
DiskReaderWriter
...
@@ -23,6 +25,9 @@ method = 'auto'
...
@@ -23,6 +25,9 @@ method = 'auto'
logger
.
add
(
"parse.log"
,
rotation
=
"10 MB"
,
level
=
"INFO"
,
logger
.
add
(
"parse.log"
,
rotation
=
"10 MB"
,
level
=
"INFO"
,
format
=
"{time} {level} {message}"
,
encoding
=
'utf-8'
,
enqueue
=
True
)
format
=
"{time} {level} {message}"
,
encoding
=
'utf-8'
,
enqueue
=
True
)
config_path
=
None
config_path
=
None
ocr_status
=
None
class
ocrRequest
(
BaseModel
):
class
ocrRequest
(
BaseModel
):
path
:
str
path
:
str
output_dir
:
str
output_dir
:
str
...
@@ -71,6 +76,10 @@ def ocr_pdf_serve(args: str):
...
@@ -71,6 +76,10 @@ def ocr_pdf_serve(args: str):
host
,
port
=
pdf_server
.
split
(
':'
)[
0
],
int
(
pdf_server
.
split
(
':'
)[
1
])
host
,
port
=
pdf_server
.
split
(
':'
)[
0
],
int
(
pdf_server
.
split
(
':'
)[
1
])
global
config_path
global
config_path
config_path
=
args
.
config_path
config_path
=
args
.
config_path
ocr_server
=
config
.
get
(
'server'
,
'ocr_server'
)
ocr_client
=
PredictClient
(
ocr_server
)
global
ocr_status
ocr_status
=
ocr_client
.
check_health
()
uvicorn
.
run
(
app
,
host
=
host
,
port
=
port
)
uvicorn
.
run
(
app
,
host
=
host
,
port
=
port
)
@
app
.
get
(
"/health"
)
@
app
.
get
(
"/health"
)
...
@@ -100,6 +109,7 @@ async def pdf_ocr(request: ocrRequest):
...
@@ -100,6 +109,7 @@ async def pdf_ocr(request: ocrRequest):
file_name
=
str
(
Path
(
doc_path
).
stem
)
file_name
=
str
(
Path
(
doc_path
).
stem
)
pdf_data
=
read_fn
(
doc_path
)
pdf_data
=
read_fn
(
doc_path
)
output_path
=
do_parse
(
output_path
=
do_parse
(
ocr_status
,
config_path
,
config_path
,
output_dir
,
output_dir
,
file_name
,
file_name
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment