Commit 01420850 authored by quyuan's avatar quyuan
Browse files

feat: add magic-pdf-dev case

parent fea2b7bd
...@@ -43,7 +43,7 @@ jobs: ...@@ -43,7 +43,7 @@ jobs:
cd $GITHUB_WORKSPACE && python tests/get_coverage.py cd $GITHUB_WORKSPACE && python tests/get_coverage.py
- name: cli test - name: cli test
run: | run: |
cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli_sdk.py source ~/.bashrc && cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli.py
notify_to_feishu: notify_to_feishu:
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }} if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
......
...@@ -3,5 +3,6 @@ conf = { ...@@ -3,5 +3,6 @@ conf = {
"code_path": os.environ.get('GITHUB_WORKSPACE'), "code_path": os.environ.get('GITHUB_WORKSPACE'),
"pdf_dev_path" : os.environ.get('GITHUB_WORKSPACE') + "/tests/test_cli/pdf_dev", "pdf_dev_path" : os.environ.get('GITHUB_WORKSPACE') + "/tests/test_cli/pdf_dev",
"pdf_res_path": "/tmp/magic-pdf", "pdf_res_path": "/tmp/magic-pdf",
"jsonl_path": "s3://llm-qatest-pnorm/mineru/test/line1.jsonl" "jsonl_path": "s3://llm-qatest-pnorm/mineru/test/line1.jsonl",
"s3_pdf_path": "s3://llm-qatest-pnorm/mineru/test/test.pdf"
} }
\ No newline at end of file
...@@ -9,7 +9,7 @@ from lib import common ...@@ -9,7 +9,7 @@ from lib import common
import magic_pdf.model as model_config import magic_pdf.model as model_config
from magic_pdf.pipe.UNIPipe import UNIPipe from magic_pdf.pipe.UNIPipe import UNIPipe
from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
from magic_pdf.rw.S3ReaderWriter import S3ReaderWriter
model_config.__use_inside_model__ = True model_config.__use_inside_model__ = True
pdf_res_path = conf.conf['pdf_res_path'] pdf_res_path = conf.conf['pdf_res_path']
code_path = conf.conf['code_path'] code_path = conf.conf['code_path']
...@@ -248,6 +248,24 @@ class TestCli: ...@@ -248,6 +248,24 @@ class TestCli:
os.system(cmd) os.system(cmd)
@pytest.mark.P1
def test_s3_sdk_suto(self):
pdf_ak = os.environ.get('pdf_ak', "")
pdf_sk = os.environ.get('pdf_sk', "")
pdf_bucket = os.environ.get('bucket', "")
pdf_endpoint = os.environ.get('pdf_endpoint', "")
s3_pdf_path = conf.conf["s3_pdf_path"]
image_dir = "s3://" + pdf_bucket + "/mineru/test/test.md"
s3pdf_cli = S3ReaderWriter(pdf_ak, pdf_sk, pdf_endpoint)
s3image_cli = S3ReaderWriter(pdf_ak, pdf_sk, pdf_endpoint, parent_path=image_dir)
pdf_bytes = s3pdf_cli.read(s3_pdf_path, mode=s3pdf_cli.MODE_BIN)
jso_useful_key = {"_pdf_type": "", "model_list": []}
pipe = UNIPipe(pdf_bytes, jso_useful_key, s3image_cli)
pipe.pipe_classify()
pipe.pipe_analyze()
pipe.pipe_parse()
md_content = pipe.pipe_mk_markdown(image_dir, drop_mode="none")
assert len(md_content) > 0
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment