Commit 88e16305 authored by zhougaofeng's avatar zhougaofeng
Browse files

Update pdf_client.py

parent bf156ede
# -*- coding: utf-8 -*-
import configparser
import time
import requests
......@@ -12,14 +13,15 @@ class ocrPdfClient:
def __init__(self, api_url):
self.api_url = api_url
def ocr_pdf_client(self, path,output_dir):
def ocr_pdf_client(self, path,output_dir,config_path):
payload = {
"path": str(path),
"output_dir": str(output_dir),
'config_path': str(config_path)
}
logger.info(f'pdf路径:{path},输出路径{output_dir}')
logger.info(f'pdf_server:{self.api_url},pdf路径:{path},输出路径{output_dir},配置文件在{config_path}')
response = requests.post(f"{self.api_url}/pdf_ocr", json=payload)
logger.info(f'response:{response}')
#logger.info(f'response:{response}')
if response.status_code == 200:
return output_dir
......@@ -29,10 +31,6 @@ class ocrPdfClient:
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
'--url',
default='http://0.0.0.0:6030',
)
parser.add_argument(
'--path',
'-p',
......@@ -43,14 +41,21 @@ def parse_args():
'-o',
required=True
)
parser.add_argument(
'--config_path',
default='/home/practice/magic_pdf-main/magic_pdf/config.ini',
)
args = parser.parse_args()
return args
def main():
args = parse_args()
embedder = ocrPdfClient(args.url)
config = configparser.ConfigParser()
config.read(args.config_path)
pdf_server = config.get('server', 'pdf_server')
embedder = ocrPdfClient(pdf_server)
doc_analyze_start = time.time()
if not os.path.isabs(args.output_dir):
......@@ -79,4 +84,3 @@ def main():
if __name__ == "__main__":
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment