Commit 543828c2 authored by 赵小蒙's avatar 赵小蒙
Browse files

Merge remote-tracking branch 'origin/master'

parents 7d04ed6e 840e25d0
...@@ -48,8 +48,8 @@ jobs: ...@@ -48,8 +48,8 @@ jobs:
- name: test_cli - name: test_cli
run: | run: |
echo $GITHUB_WORKSPACE echo $GITHUB_WORKSPACE
cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli.py cd $GITHUB_WORKSPACE && export PYTHONPATH=. && pytest -s -v tests/test_unit.py
cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli.py
notify_to_feishu: notify_to_feishu:
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }} if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
......
...@@ -449,7 +449,7 @@ def replace_inline_equations(inline_equation_bboxes, raw_text_blocks): ...@@ -449,7 +449,7 @@ def replace_inline_equations(inline_equation_bboxes, raw_text_blocks):
for blk in raw_text_blocks: for blk in raw_text_blocks:
if _is_xin(eqbox, blk["bbox"]): if _is_xin(eqbox, blk["bbox"]):
if not replace_eq_blk(eqinfo, blk): if not replace_eq_blk(eqinfo, blk):
logger.error(f"行内公式没有替换成功:{eqinfo} ") logger.warning(f"行内公式没有替换成功:{eqinfo} ")
else: else:
break break
......
...@@ -17,7 +17,7 @@ def count_folders_and_check_contents(directory): ...@@ -17,7 +17,7 @@ def count_folders_and_check_contents(directory):
folder_count = folder_count + 1 folder_count = folder_count + 1
assert os.listdir(folder_path) is not None assert os.listdir(folder_path) is not None
print (folder_count) print (folder_count)
assert folder_count == 13 assert folder_count == 14
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -21,11 +21,31 @@ class TestCli: ...@@ -21,11 +21,31 @@ class TestCli:
def test_pdf_specify_jsonl(self): def test_pdf_specify_jsonl(self):
""" """
输入jsonl 输入jsonl, 默认方式解析
""" """
cmd = "cd %s && export PYTHONPATH=. && python " cmd = "cd %s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972'" % (code_path)
logging.info(cmd)
common.check_shell(cmd)
common.count_folders_and_check_contents(pdf_res_path)
def test_pdf_specify_jsonl_txt(self):
"""
输入jsonl, txt方式解析
"""
cmd = "cd %s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972' --method txt" % (code_path)
logging.info(cmd)
common.check_shell(cmd)
common.count_folders_and_check_contents(pdf_res_path)
def test_pdf_specify_jsonl_ocr(self):
"""
输入jsonl, ocr方式解析
"""
cmd = "cd %s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972' --method ocr" % (code_path)
logging.info(cmd)
common.check_shell(cmd)
common.count_folders_and_check_contents(pdf_res_path)
if __name__ == "__main__": if __name__ == "__main__":
pytest.main() pytest.main()
import pytest import pytest
import os
from magic_pdf.libs.boxbase import _is_in_or_part_overlap, _is_in_or_part_overlap_with_area_ratio, _is_in, \ from magic_pdf.libs.boxbase import _is_in_or_part_overlap, _is_in_or_part_overlap_with_area_ratio, _is_in, \
_is_part_overlap, _left_intersect, _right_intersect, _is_vertical_full_overlap, _is_bottom_full_overlap, \ _is_part_overlap, _left_intersect, _right_intersect, _is_vertical_full_overlap, _is_bottom_full_overlap, \
_is_left_overlap, __is_overlaps_y_exceeds_threshold, calculate_iou, calculate_overlap_area_2_minbox_area_ratio, \ _is_left_overlap, __is_overlaps_y_exceeds_threshold, calculate_iou, calculate_overlap_area_2_minbox_area_ratio, \
...@@ -513,12 +513,17 @@ def test_bbox_relative_pos(box1: tuple, box2: tuple, target_box: tuple) -> None: ...@@ -513,12 +513,17 @@ def test_bbox_relative_pos(box1: tuple, box2: tuple, target_box: tuple) -> None:
def test_bbox_distance(box1: tuple, box2: tuple, target_num: float) -> None: def test_bbox_distance(box1: tuple, box2: tuple, target_num: float) -> None:
assert target_num - bbox_distance(box1, box2) < 1 assert target_num - bbox_distance(box1, box2) < 1
@pytest.mark.skip(reason="skip")
# 根据bucket_name获取s3配置ak,sk,endpoint # 根据bucket_name获取s3配置ak,sk,endpoint
def test_get_s3_config() -> None: def test_get_s3_config() -> None:
with open("./s3_config_testdata.json") as f: bucket_name = os.getenv('bucket_name')
contents = f.read() target_data = os.getenv('target_data')
for content in eval(contents): assert convert_string_to_list(target_data) == list(get_s3_config(bucket_name))
bucket_name = content["bucket_name"]
target_data = content["target_data"]
assert target_data == list(get_s3_config(bucket_name))
def convert_string_to_list(s):
cleaned_s = s.strip("'")
items = cleaned_s.split(',')
cleaned_items = [item.strip() for item in items]
return cleaned_items
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment