test_cli.py 2.08 KB
Newer Older
quyuan's avatar
quyuan committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import pytest
import os
from conf import conf
import subprocess
from lib import common
import logging
pdf_res_path = conf.conf["pdf_res_path"]
code_path = conf.conf["code_path"]
pdf_dev_path = conf.conf["pdf_dev_path"]
class TestCli:
   
    def test_pdf_specify_dir(self):
        """
        输入pdf和指定目录的模型结果
        """
        cmd = 'cd %s && export PYTHONPATH=. && find %s -type f -name "*.pdf" | xargs -I{} python magic_pdf/cli/magicpdf.py  pdf-command  --pdf {}' % (code_path, pdf_dev_path)
        logging.info(cmd)
        common.check_shell(cmd)
quyuan's avatar
quyuan committed
19
        #common.count_folders_and_check_contents(pdf_res_path)      
quyuan's avatar
quyuan committed
20
21
22
23
   

    def test_pdf_specify_jsonl(self):
        """
quyuan's avatar
quyuan committed
24
        输入jsonl, 默认方式解析
quyuan's avatar
quyuan committed
25
        """
quyuan's avatar
quyuan committed
26
        cmd = "cd %s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py  json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972'" % (code_path)
quyuan's avatar
quyuan committed
27
28
        logging.info(cmd)
        common.check_shell(cmd)
quyuan's avatar
quyuan committed
29
        #common.count_folders_and_check_contents(pdf_res_path)
quyuan's avatar
quyuan committed
30

quyuan's avatar
quyuan committed
31
32
33
34
    def test_pdf_specify_jsonl_txt(self):
        """
        输入jsonl, txt方式解析  
        """
quyuan's avatar
quyuan committed
35
        cmd = "cd %s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py  json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972' --method txt" % (code_path)
quyuan's avatar
quyuan committed
36
37
        logging.info(cmd)
        common.check_shell(cmd)
quyuan's avatar
quyuan committed
38
        #common.count_folders_and_check_contents(pdf_res_path)
quyuan's avatar
quyuan committed
39
40
41
42
43
    
    def test_pdf_specify_jsonl_ocr(self):
        """
        输入jsonl, ocr方式解析
        """
quyuan's avatar
quyuan committed
44
        cmd = "cd %s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py  json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972' --method ocr" % (code_path)
quyuan's avatar
quyuan committed
45
46
        logging.info(cmd)
        common.check_shell(cmd)
quyuan's avatar
quyuan committed
47
        #common.count_folders_and_check_contents(pdf_res_path)
quyuan's avatar
quyuan committed
48
 
quyuan's avatar
quyuan committed
49
50
51
 
if __name__ == "__main__":
    pytest.main()