Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhougaofeng
magic_pdf
Commits
88e16305
"...git@developer.sourcefind.cn:chenpangpang/open-webui.git" did not exist on "3b27acc77eaf112d8521ae22282c619ef9ccf394"
Commit
88e16305
authored
Oct 23, 2024
by
zhougaofeng
Browse files
Update pdf_client.py
parent
bf156ede
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
10 deletions
+14
-10
magic_pdf/parse/pdf_client.py
magic_pdf/parse/pdf_client.py
+14
-10
No files found.
magic_pdf/parse/pdf_client.py
View file @
88e16305
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
import
configparser
import
time
import
time
import
requests
import
requests
...
@@ -12,14 +13,15 @@ class ocrPdfClient:
...
@@ -12,14 +13,15 @@ class ocrPdfClient:
def
__init__
(
self
,
api_url
):
def
__init__
(
self
,
api_url
):
self
.
api_url
=
api_url
self
.
api_url
=
api_url
def
ocr_pdf_client
(
self
,
path
,
output_dir
):
def
ocr_pdf_client
(
self
,
path
,
output_dir
,
config_path
):
payload
=
{
payload
=
{
"path"
:
str
(
path
),
"path"
:
str
(
path
),
"output_dir"
:
str
(
output_dir
),
"output_dir"
:
str
(
output_dir
),
'config_path'
:
str
(
config_path
)
}
}
logger
.
info
(
f
'pdf路径:
{
path
}
,输出路径
{
output_dir
}
'
)
logger
.
info
(
f
'pdf
_server:
{
self
.
api_url
}
,pdf
路径:
{
path
}
,输出路径
{
output_dir
}
,配置文件在
{
config_path
}
'
)
response
=
requests
.
post
(
f
"
{
self
.
api_url
}
/pdf_ocr"
,
json
=
payload
)
response
=
requests
.
post
(
f
"
{
self
.
api_url
}
/pdf_ocr"
,
json
=
payload
)
logger
.
info
(
f
'response:
{
response
}
'
)
#
logger.info(f'response:{response}')
if
response
.
status_code
==
200
:
if
response
.
status_code
==
200
:
return
output_dir
return
output_dir
...
@@ -29,10 +31,6 @@ class ocrPdfClient:
...
@@ -29,10 +31,6 @@ class ocrPdfClient:
def
parse_args
():
def
parse_args
():
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--url'
,
default
=
'http://0.0.0.0:6030'
,
)
parser
.
add_argument
(
parser
.
add_argument
(
'--path'
,
'--path'
,
'-p'
,
'-p'
,
...
@@ -43,14 +41,21 @@ def parse_args():
...
@@ -43,14 +41,21 @@ def parse_args():
'-o'
,
'-o'
,
required
=
True
required
=
True
)
)
parser
.
add_argument
(
'--config_path'
,
default
=
'/home/practice/magic_pdf-main/magic_pdf/config.ini'
,
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
return
args
return
args
def
main
():
def
main
():
args
=
parse_args
()
args
=
parse_args
()
config
=
configparser
.
ConfigParser
()
embedder
=
ocrPdfClient
(
args
.
url
)
config
.
read
(
args
.
config_path
)
pdf_server
=
config
.
get
(
'server'
,
'pdf_server'
)
embedder
=
ocrPdfClient
(
pdf_server
)
doc_analyze_start
=
time
.
time
()
doc_analyze_start
=
time
.
time
()
if
not
os
.
path
.
isabs
(
args
.
output_dir
):
if
not
os
.
path
.
isabs
(
args
.
output_dir
):
...
@@ -79,4 +84,3 @@ def main():
...
@@ -79,4 +84,3 @@ def main():
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
main
()
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment