Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhougaofeng
magic_pdf
Commits
b327e826
Commit
b327e826
authored
Oct 23, 2024
by
zhougaofeng
Browse files
Update pdf_server.py
parent
2cdfddd2
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
15 additions
and
6 deletions
+15
-6
magic_pdf/tools/pdf_server.py
magic_pdf/tools/pdf_server.py
+15
-6
No files found.
magic_pdf/tools/pdf_server.py
View file @
b327e826
...
@@ -19,6 +19,10 @@ import configparser
...
@@ -19,6 +19,10 @@ import configparser
app
=
FastAPI
()
app
=
FastAPI
()
method
=
'auto'
method
=
'auto'
logger
.
add
(
"../../parse.log"
,
rotation
=
"10 MB"
,
level
=
"INFO"
,
format
=
"{time} {level} {message}"
,
encoding
=
'utf-8'
,
enqueue
=
True
)
class
ocrRequest
(
BaseModel
):
class
ocrRequest
(
BaseModel
):
path
:
str
path
:
str
output_dir
:
str
output_dir
:
str
...
@@ -57,10 +61,8 @@ def ocr_pdf_serve(args: str):
...
@@ -57,10 +61,8 @@ def ocr_pdf_serve(args: str):
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
args
.
dcu_id
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
args
.
dcu_id
config
=
configparser
.
ConfigParser
()
config
=
configparser
.
ConfigParser
()
config
.
read
(
args
.
config_path
)
config
.
read
(
args
.
config_path
)
pdf_server
=
config
.
get
(
'server'
,
'pdf_server'
)
host
=
config
.
get
(
'server'
,
'pdf_host'
)
if
'http'
in
pdf_server
:
port
=
int
(
config
.
get
(
'server'
,
'pdf_port'
))
pdf_server
=
pdf_server
.
split
(
'://'
)[
1
]
host
,
port
=
pdf_server
.
split
(
':'
)[
0
],
int
(
pdf_server
.
split
(
':'
)[
1
])
uvicorn
.
run
(
app
,
host
=
host
,
port
=
port
)
uvicorn
.
run
(
app
,
host
=
host
,
port
=
port
)
@
app
.
post
(
"/pdf_ocr"
)
@
app
.
post
(
"/pdf_ocr"
)
...
@@ -75,7 +77,8 @@ async def pdf_ocr(request: ocrRequest):
...
@@ -75,7 +77,8 @@ async def pdf_ocr(request: ocrRequest):
debug_able
=
False
debug_able
=
False
start_page_id
=
0
start_page_id
=
0
end_page_id
=
None
end_page_id
=
None
logger
.
info
(
f
'method:
{
method
}
,path:
{
path
}
,output_dir
{
output_dir
}
'
)
logger
.
info
(
f
"method:
{
method
}
, path:
{
path
}
, output_dir:
{
output_dir
}
, config_path:
{
request
.
config_path
}
"
)
def
read_fn
(
path
):
def
read_fn
(
path
):
disk_rw
=
DiskReaderWriter
(
os
.
path
.
dirname
(
path
))
disk_rw
=
DiskReaderWriter
(
os
.
path
.
dirname
(
path
))
return
disk_rw
.
read
(
os
.
path
.
basename
(
path
),
AbsReaderWriter
.
MODE_BIN
)
return
disk_rw
.
read
(
os
.
path
.
basename
(
path
),
AbsReaderWriter
.
MODE_BIN
)
...
@@ -100,7 +103,13 @@ async def pdf_ocr(request: ocrRequest):
...
@@ -100,7 +103,13 @@ async def pdf_ocr(request: ocrRequest):
logger
.
exception
(
e
)
logger
.
exception
(
e
)
logger
.
info
(
f
'config_path:
{
request
.
config_path
}
'
)
logger
.
info
(
f
'config_path:
{
request
.
config_path
}
'
)
parse_doc
(
path
,
request
.
config_path
)
try
:
parse_doc
(
path
,
request
.
config_path
)
return
{
"status_code"
:
"200"
,
"message"
:
"PDF parsed successfully"
}
except
Exception
as
e
:
logger
.
exception
(
e
)
# Return error response with error details
raise
HTTPException
(
status_code
=
500
,
detail
=
str
(
e
))
def
main
():
def
main
():
args
=
parse_args
()
args
=
parse_args
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment