Commit 543828c2 authored by 赵小蒙's avatar 赵小蒙
Browse files

Merge remote-tracking branch 'origin/master'

parents 7d04ed6e 840e25d0
......@@ -48,8 +48,8 @@ jobs:
- name: test_cli
run: |
echo $GITHUB_WORKSPACE
cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli.py
cd $GITHUB_WORKSPACE && export PYTHONPATH=. && pytest -s -v tests/test_unit.py
cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli.py
notify_to_feishu:
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
......
......@@ -449,7 +449,7 @@ def replace_inline_equations(inline_equation_bboxes, raw_text_blocks):
for blk in raw_text_blocks:
if _is_xin(eqbox, blk["bbox"]):
if not replace_eq_blk(eqinfo, blk):
logger.error(f"行内公式没有替换成功:{eqinfo} ")
logger.warning(f"行内公式没有替换成功:{eqinfo} ")
else:
break
......
......@@ -17,7 +17,7 @@ def count_folders_and_check_contents(directory):
folder_count = folder_count + 1
assert os.listdir(folder_path) is not None
print (folder_count)
assert folder_count == 13
assert folder_count == 14
if __name__ == "__main__":
......
......@@ -21,11 +21,31 @@ class TestCli:
def test_pdf_specify_jsonl(self):
"""
输入jsonl
输入jsonl, 默认方式解析
"""
cmd = "cd %s && export PYTHONPATH=. && python "
cmd = "cd %s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972'" % (code_path)
logging.info(cmd)
common.check_shell(cmd)
common.count_folders_and_check_contents(pdf_res_path)
def test_pdf_specify_jsonl_txt(self):
"""
输入jsonl, txt方式解析
"""
cmd = "cd %s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972' --method txt" % (code_path)
logging.info(cmd)
common.check_shell(cmd)
common.count_folders_and_check_contents(pdf_res_path)
def test_pdf_specify_jsonl_ocr(self):
"""
输入jsonl, ocr方式解析
"""
cmd = "cd %s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972' --method ocr" % (code_path)
logging.info(cmd)
common.check_shell(cmd)
common.count_folders_and_check_contents(pdf_res_path)
if __name__ == "__main__":
pytest.main()
import pytest
import os
from magic_pdf.libs.boxbase import _is_in_or_part_overlap, _is_in_or_part_overlap_with_area_ratio, _is_in, \
_is_part_overlap, _left_intersect, _right_intersect, _is_vertical_full_overlap, _is_bottom_full_overlap, \
_is_left_overlap, __is_overlaps_y_exceeds_threshold, calculate_iou, calculate_overlap_area_2_minbox_area_ratio, \
......@@ -513,12 +513,17 @@ def test_bbox_relative_pos(box1: tuple, box2: tuple, target_box: tuple) -> None:
def test_bbox_distance(box1: tuple, box2: tuple, target_num: float) -> None:
assert target_num - bbox_distance(box1, box2) < 1
@pytest.mark.skip(reason="skip")
# 根据bucket_name获取s3配置ak,sk,endpoint
def test_get_s3_config() -> None:
with open("./s3_config_testdata.json") as f:
contents = f.read()
for content in eval(contents):
bucket_name = content["bucket_name"]
target_data = content["target_data"]
assert target_data == list(get_s3_config(bucket_name))
bucket_name = os.getenv('bucket_name')
target_data = os.getenv('target_data')
assert convert_string_to_list(target_data) == list(get_s3_config(bucket_name))
def convert_string_to_list(s):
cleaned_s = s.strip("'")
items = cleaned_s.split(',')
cleaned_items = [item.strip() for item in items]
return cleaned_items
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment