test_cli_sdk.py 19.1 KB
Newer Older
yyy's avatar
yyy committed
1
2
3
4
5
6
"""test cli and sdk."""
import logging
import os
import pytest
from conf import conf
from lib import common
yyy's avatar
yyy committed
7
import time
yyy's avatar
yyy committed
8
import magic_pdf.model as model_config
dt-yy's avatar
dt-yy committed
9
10
from magic_pdf.data.read_api import read_local_images
from magic_pdf.data.read_api import read_local_office
dt-yy's avatar
dt-yy committed
11
12
from magic_pdf.data.data_reader_writer import S3DataReader, S3DataWriter
from magic_pdf.config.make_content_config import DropMode, MakeMode
dt-yy's avatar
dt-yy committed
13
14
15
16
from magic_pdf.data.data_reader_writer import FileBasedDataWriter, FileBasedDataReader
from magic_pdf.data.dataset import PymuDocDataset
from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
from magic_pdf.config.enums import SupportedPdfParseMethod
yyy's avatar
yyy committed
17
18
19
pdf_res_path = conf.conf['pdf_res_path']
code_path = conf.conf['code_path']
pdf_dev_path = conf.conf['pdf_dev_path']
quyuan's avatar
quyuan committed
20
magic_pdf_config = "/home/quyuan/magic-pdf.json"
yyy's avatar
yyy committed
21
22
23

class TestCli:
    """test cli."""
quyuan's avatar
quyuan committed
24
25
26
27
28
29
30
31
32
    @pytest.fixture(autouse=True)
    def setup(self):
        """
        init
        """
        common.clear_gpu_memory()
        common.update_config_file(magic_pdf_config, "device-mode", "cuda")
        # 这里可以添加任何前置操作
        yield
yyy's avatar
yyy committed
33
34

    @pytest.mark.P0
dt-yy's avatar
dt-yy committed
35
    def test_pdf_local_sdk(self):
yyy's avatar
yyy committed
36
37
38
39
40
41
42
43
44
45
        """pdf sdk auto test."""
        demo_names = list()
        pdf_path = os.path.join(pdf_dev_path, 'pdf')
        for pdf_file in os.listdir(pdf_path):
            if pdf_file.endswith('.pdf'):
                demo_names.append(pdf_file.split('.')[0])
        for demo_name in demo_names:
            pdf_path = os.path.join(pdf_dev_path, 'pdf', f'{demo_name}.pdf')
            local_image_dir = os.path.join(pdf_dev_path, 'pdf', 'images')
            image_dir = str(os.path.basename(local_image_dir))
dt-yy's avatar
dt-yy committed
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
            name_without_suff = os.path.basename(pdf_path).split(".pdf")[0]
            dir_path = os.path.join(pdf_dev_path, 'mineru')
            image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(dir_path)
            reader1 = FileBasedDataReader("")
            pdf_bytes = reader1.read(pdf_path)
            ds = PymuDocDataset(pdf_bytes)
            ## inference
            if ds.classify() == SupportedPdfParseMethod.OCR:
                infer_result = ds.apply(doc_analyze, ocr=True)
                ## pipeline
                pipe_result = infer_result.pipe_ocr_mode(image_writer)
            else:
                infer_result = ds.apply(doc_analyze, ocr=False)
                ## pipeline
                pipe_result = infer_result.pipe_txt_mode(image_writer)
            common.delete_file(dir_path)
            ### draw model result on each page
            infer_result.draw_model(os.path.join(dir_path, f"{name_without_suff}_model.pdf"))

            ### get model inference result
            model_inference_result = infer_result.get_infer_res()

            ### draw layout result on each page
            pipe_result.draw_layout(os.path.join(dir_path, f"{name_without_suff}_layout.pdf"))

            ### draw spans result on each page
            pipe_result.draw_span(os.path.join(dir_path, f"{name_without_suff}_spans.pdf"))

            ### dump markdown
dt-yy's avatar
dt-yy committed
75
            md_content = pipe_result.get_markdown(image_dir)
dt-yy's avatar
dt-yy committed
76
77
            pipe_result.dump_md(md_writer, f"{name_without_suff}.md", image_dir)
            ### get content list content
dt-yy's avatar
dt-yy committed
78
79
80
            content_list_content = pipe_result.get_content_list(image_dir)
            pipe_result.dump_content_list(md_writer, f"{name_without_suff}_content_list.json", image_dir)
            
dt-yy's avatar
dt-yy committed
81
82
            ### get middle json
            middle_json_content = pipe_result.get_middle_json()
dt-yy's avatar
dt-yy committed
83
84
            ### dump middle json
            pipe_result.dump_middle_json(md_writer, f'{name_without_suff}_middle.json')
dt-yy's avatar
dt-yy committed
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
            common.sdk_count_folders_and_check_contents(dir_path)

    @pytest.mark.P0
    def test_pdf_s3_sdk(self):
        """pdf s3 sdk test."""
        demo_names = list()
        pdf_path = os.path.join(pdf_dev_path, 'pdf')
        for pdf_file in os.listdir(pdf_path):
            if pdf_file.endswith('.pdf'):
                demo_names.append(pdf_file.split('.')[0])
        for demo_name in demo_names:
            pdf_path = os.path.join(pdf_dev_path, 'pdf', f'{demo_name}.pdf')
            local_image_dir = os.path.join(pdf_dev_path, 'pdf', 'images')
            image_dir = str(os.path.basename(local_image_dir))
            name_without_suff = os.path.basename(pdf_path).split(".pdf")[0]
yyy's avatar
yyy committed
100
            dir_path = os.path.join(pdf_dev_path, 'mineru')
dt-yy's avatar
dt-yy committed
101
            pass
dt-yy's avatar
dt-yy committed
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187

    @pytest.mark.P0
    def test_pdf_local_ppt(self):
        """pdf sdk auto test."""
        demo_names = list()
        pdf_path = os.path.join(pdf_dev_path, 'ppt')
        for pdf_file in os.listdir(pdf_path):
            if pdf_file.endswith('.pptx'):
                demo_names.append(pdf_file.split('.')[0])
        for demo_name in demo_names:
            pdf_path = os.path.join(pdf_dev_path, 'ppt', f'{demo_name}.pptx')
            local_image_dir = os.path.join(pdf_dev_path, 'mineru', 'images')
            image_dir = str(os.path.basename(local_image_dir))
            name_without_suff = os.path.basename(pdf_path).split(".pptx")[0]
            dir_path = os.path.join(pdf_dev_path, 'mineru')
            image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(dir_path)
            ds = read_local_office(pdf_path)[0]
            common.delete_file(dir_path)
            
            ds.apply(doc_analyze, ocr=True).pipe_txt_mode(image_writer).dump_md(md_writer, f"{name_without_suff}.md", image_dir)          
            common.sdk_count_folders_and_check_contents(dir_path)



    @pytest.mark.P0
    def test_pdf_local_image(self):
        """pdf sdk auto test."""
        demo_names = list()
        pdf_path = os.path.join(pdf_dev_path, 'images')
        for pdf_file in os.listdir(pdf_path):
            if pdf_file.endswith('.jpg'):
                demo_names.append(pdf_file.split('.')[0])
        for demo_name in demo_names:
            pdf_path = os.path.join(pdf_dev_path, 'images', f'{demo_name}.jpg')
            local_image_dir = os.path.join(pdf_dev_path, 'mineru', 'images')
            image_dir = str(os.path.basename(local_image_dir))
            name_without_suff = os.path.basename(pdf_path).split(".jpg")[0]
            dir_path = os.path.join(pdf_dev_path, 'mineru')
            common.delete_file(dir_path)
            image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(dir_path)
            ds = read_local_images(pdf_path)[0]
            ds.apply(doc_analyze, ocr=True).pipe_ocr_mode(image_writer).dump_md(
            md_writer, f"{name_without_suff}.md", image_dir)
            common.sdk_count_folders_and_check_contents(dir_path)


    @pytest.mark.P0
    def test_local_image_dir(self):
        """local image dir."""
        demo_names = list()
        pdf_path = os.path.join(pdf_dev_path, 'images')
        dir_path = os.path.join(pdf_dev_path, 'mineru')
        local_image_dir = os.path.join(pdf_dev_path, 'mineru', 'images')
        image_dir = str(os.path.basename(local_image_dir))
        image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(dir_path)
        common.delete_file(dir_path)
        dss = read_local_images(pdf_path, suffixes=['.png', '.jpg'])
        count = 0
        for ds in dss:
            ds.apply(doc_analyze, ocr=True).pipe_ocr_mode(image_writer).dump_md(md_writer, f"{count}.md", image_dir)
            count += 1
        common.sdk_count_folders_and_check_contents(dir_path)

    def test_local_doc_parse(self):
        """
        doc 解析
        """
        demo_names = list()
        pdf_path = os.path.join(pdf_dev_path, 'doc')
        for pdf_file in os.listdir(pdf_path):
            if pdf_file.endswith('.docx'):
                demo_names.append(pdf_file.split('.')[0])
        for demo_name in demo_names:
            pdf_path = os.path.join(pdf_dev_path, 'doc', f'{demo_name}.docx')
            local_image_dir = os.path.join(pdf_dev_path, 'mineru', 'images')
            image_dir = str(os.path.basename(local_image_dir))
            name_without_suff = os.path.basename(pdf_path).split(".docx")[0]
            dir_path = os.path.join(pdf_dev_path, 'mineru')
            image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(dir_path)
            ds = read_local_office(pdf_path)[0]
            common.delete_file(dir_path)
            
            ds.apply(doc_analyze, ocr=True).pipe_txt_mode(image_writer).dump_md(md_writer, f"{name_without_suff}.md", image_dir)          
            common.sdk_count_folders_and_check_contents(dir_path)


yyy's avatar
yyy committed
188
189
190
    @pytest.mark.P0
    def test_pdf_cli_auto(self):
        """magic_pdf cli test auto."""
yyy's avatar
yyy committed
191
        time.sleep(2)
yyy's avatar
yyy committed
192
193
194
195
196
197
198
199
200
201
202
203
204
205
        demo_names = []
        pdf_path = os.path.join(pdf_dev_path, 'pdf')
        for pdf_file in os.listdir(pdf_path):
            if pdf_file.endswith('.pdf'):
                demo_names.append(pdf_file.split('.')[0])
        for demo_name in demo_names:
            res_path = os.path.join(pdf_dev_path, 'mineru')
            common.delete_file(res_path)
            cmd = 'magic-pdf -p %s -o %s -m %s' % (os.path.join(
                pdf_path, f'{demo_name}.pdf'), res_path, 'auto')
            logging.info(cmd)
            os.system(cmd)
            common.cli_count_folders_and_check_contents(
                os.path.join(res_path, demo_name, 'auto'))
dt-yy's avatar
dt-yy committed
206
  
yyy's avatar
yyy committed
207
    @pytest.mark.P0
yyy's avatar
yyy committed
208
    def test_pdf_cli_txt(self):
yyy's avatar
yyy committed
209
        """magic_pdf cli test txt."""
yyy's avatar
yyy committed
210
        time.sleep(2)
yyy's avatar
yyy committed
211
212
213
214
215
216
217
218
219
220
221
222
223
224
        demo_names = []
        pdf_path = os.path.join(pdf_dev_path, 'pdf')
        for pdf_file in os.listdir(pdf_path):
            if pdf_file.endswith('.pdf'):
                demo_names.append(pdf_file.split('.')[0])
        for demo_name in demo_names:
            res_path = os.path.join(pdf_dev_path, 'mineru')
            common.delete_file(res_path)
            cmd = 'magic-pdf -p %s -o %s -m %s' % (os.path.join(
                pdf_path, f'{demo_name}.pdf'), res_path, 'txt')
            logging.info(cmd)
            os.system(cmd)
            common.cli_count_folders_and_check_contents(
                os.path.join(res_path, demo_name, 'txt'))
yyy's avatar
yyy committed
225
   
yyy's avatar
yyy committed
226
    @pytest.mark.P0
yyy's avatar
yyy committed
227
    def test_pdf_cli_ocr(self):
yyy's avatar
yyy committed
228
        """magic_pdf cli test ocr."""
yyy's avatar
yyy committed
229
        time.sleep(2)
yyy's avatar
yyy committed
230
231
232
233
234
235
236
237
238
239
240
241
242
243
        demo_names = []
        pdf_path = os.path.join(pdf_dev_path, 'pdf')
        for pdf_file in os.listdir(pdf_path):
            if pdf_file.endswith('.pdf'):
                demo_names.append(pdf_file.split('.')[0])
        for demo_name in demo_names:
            res_path = os.path.join(pdf_dev_path, 'mineru')
            common.delete_file(res_path)
            cmd = 'magic-pdf -p %s -o %s -m %s' % (os.path.join(
                pdf_path, f'{demo_name}.pdf'), res_path, 'ocr')
            logging.info(cmd)
            os.system(cmd)
            common.cli_count_folders_and_check_contents(
                os.path.join(res_path, demo_name, 'ocr'))
yyy's avatar
yyy committed
244
245
    
    @pytest.mark.skip(reason='out-of-date api')
quyuan's avatar
quyuan committed
246
247
248
    @pytest.mark.P1
    def test_pdf_dev_cli_local_jsonl_txt(self):
        """magic_pdf_dev cli local txt."""
yyy's avatar
yyy committed
249
        time.sleep(2)
quyuan's avatar
quyuan committed
250
        jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
quyuan's avatar
quyuan committed
251
        cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, "txt")
quyuan's avatar
quyuan committed
252
253
254
        logging.info(cmd)
        os.system(cmd)

yyy's avatar
yyy committed
255
    @pytest.mark.skip(reason='out-of-date api')
quyuan's avatar
quyuan committed
256
257
258
    @pytest.mark.P1
    def test_pdf_dev_cli_local_jsonl_ocr(self):
        """magic_pdf_dev cli local ocr."""
yyy's avatar
yyy committed
259
        time.sleep(2)
quyuan's avatar
quyuan committed
260
        jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
quyuan's avatar
quyuan committed
261
        cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'ocr')
quyuan's avatar
quyuan committed
262
263
264
        logging.info(cmd)
        os.system(cmd)

yyy's avatar
yyy committed
265
    @pytest.mark.skip(reason='out-of-date api')
quyuan's avatar
quyuan committed
266
267
268
    @pytest.mark.P1
    def test_pdf_dev_cli_local_jsonl_auto(self):
        """magic_pdf_dev cli local auto."""
yyy's avatar
yyy committed
269
        time.sleep(2)
quyuan's avatar
quyuan committed
270
        jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
quyuan's avatar
quyuan committed
271
        cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'auto')
quyuan's avatar
quyuan committed
272
273
        logging.info(cmd)
        os.system(cmd)
yyy's avatar
yyy committed
274
275
    
    @pytest.mark.skip(reason='out-of-date api')
quyuan's avatar
quyuan committed
276
277
278
    @pytest.mark.P1
    def test_pdf_dev_cli_s3_jsonl_txt(self):
        """magic_pdf_dev cli s3 txt."""
yyy's avatar
yyy committed
279
        time.sleep(2)
quyuan's avatar
quyuan committed
280
        jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
quyuan's avatar
quyuan committed
281
        cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, "txt")
quyuan's avatar
quyuan committed
282
283
284
        logging.info(cmd)
        os.system(cmd)

yyy's avatar
yyy committed
285
    @pytest.mark.skip(reason='out-of-date api')
quyuan's avatar
quyuan committed
286
287
288
    @pytest.mark.P1
    def test_pdf_dev_cli_s3_jsonl_ocr(self):
        """magic_pdf_dev cli s3 ocr."""
yyy's avatar
yyy committed
289
        time.sleep(2)
quyuan's avatar
quyuan committed
290
        jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
quyuan's avatar
quyuan committed
291
        cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'ocr')
quyuan's avatar
quyuan committed
292
293
294
        logging.info(cmd)
        os.system(cmd)

yyy's avatar
yyy committed
295
    @pytest.mark.skip(reason='out-of-date api')
quyuan's avatar
quyuan committed
296
297
298
    @pytest.mark.P1
    def test_pdf_dev_cli_s3_jsonl_auto(self):
        """magic_pdf_dev cli s3 auto."""
yyy's avatar
yyy committed
299
        time.sleep(2)
quyuan's avatar
quyuan committed
300
301
302
303
304
        jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
        cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'auto')
        logging.info(cmd)
        os.system(cmd)

quyuan's avatar
quyuan committed
305
306
307
    @pytest.mark.P1
    def test_pdf_dev_cli_pdf_json_auto(self):
        """magic_pdf_dev cli pdf+json auto."""
yyy's avatar
yyy committed
308
        time.sleep(2)
quyuan's avatar
quyuan committed
309
        json_path = os.path.join(pdf_dev_path, 'test_model.json')
yyy's avatar
yyy committed
310
        pdf_path = os.path.join(pdf_dev_path, 'pdf', 'test_rearch_report.pdf')
quyuan's avatar
quyuan committed
311
312
313
        cmd = 'magic-pdf-dev --pdf %s --json %s --method %s' % (pdf_path, json_path, 'auto')
        logging.info(cmd)
        os.system(cmd)
yyy's avatar
yyy committed
314
315
   
    @pytest.mark.skip(reason='out-of-date api')
quyuan's avatar
quyuan committed
316
317
318
    @pytest.mark.P1
    def test_pdf_dev_cli_pdf_json_ocr(self):
        """magic_pdf_dev cli pdf+json ocr."""
yyy's avatar
yyy committed
319
        time.sleep(2)
quyuan's avatar
quyuan committed
320
        json_path = os.path.join(pdf_dev_path, 'test_model.json')
yyy's avatar
yyy committed
321
        pdf_path = os.path.join(pdf_dev_path, 'pdf', 'test_rearch_report.pdf')
quyuan's avatar
quyuan committed
322
323
324
        cmd = 'magic-pdf-dev --pdf %s --json %s --method %s' % (pdf_path, json_path, 'auto')
        logging.info(cmd)
        os.system(cmd)
dt-yy's avatar
dt-yy committed
325
    
quyuan's avatar
quyuan committed
326

yyy's avatar
yyy committed
327
328
329
330
    @pytest.mark.P1
    def test_local_magic_pdf_open_st_table(self):
        """magic pdf cli open st table."""
        time.sleep(2)
quyuan's avatar
quyuan committed
331
332
333
334
335
336
337
        #pre_cmd = "cp ~/magic_pdf_st.json ~/magic-pdf.json"
        value = {
        "model": "struct_eqtable",
        "enable": True,
        "max_time": 400
        }   
        common.update_config_file(magic_pdf_config, "table-config", value)
yyy's avatar
yyy committed
338
339
340
341
        pdf_path = os.path.join(pdf_dev_path, "pdf", "test_rearch_report.pdf")
        common.delete_file(pdf_res_path)
        cli_cmd = "magic-pdf -p %s -o %s" % (pdf_path, pdf_res_path)
        os.system(cli_cmd)
quyuan's avatar
quyuan committed
342
        res = common.check_html_table_exists(os.path.join(pdf_res_path, "test_rearch_report", "auto", "test_rearch_report.md"))
yyy's avatar
yyy committed
343
344
345
        assert res is True
  
    @pytest.mark.P1
quyuan's avatar
quyuan committed
346
347
    def test_local_magic_pdf_open_tablemaster_cuda(self):
        """magic pdf cli open table master html table cuda mode."""
yyy's avatar
yyy committed
348
        time.sleep(2)
quyuan's avatar
quyuan committed
349
350
351
352
353
354
355
356
        #pre_cmd = "cp ~/magic_pdf_html.json ~/magic-pdf.json"
        #os.system(pre_cmd)
        value = {
        "model": "tablemaster",
        "enable": True,
        "max_time": 400
        }   
        common.update_config_file(magic_pdf_config, "table-config", value)
yyy's avatar
yyy committed
357
358
359
360
361
362
363
364
        pdf_path = os.path.join(pdf_dev_path, "pdf", "test_rearch_report.pdf")
        common.delete_file(pdf_res_path)
        cli_cmd = "magic-pdf -p %s -o %s" % (pdf_path, pdf_res_path)
        os.system(cli_cmd)
        res = common.check_html_table_exists(os.path.join(pdf_res_path, "test_rearch_report", "auto", "test_rearch_report.md"))
        assert res is True
    
    @pytest.mark.P1
quyuan's avatar
quyuan committed
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
    def test_local_magic_pdf_open_rapidai_table(self):
        """magic pdf cli open rapid ai table."""
        time.sleep(2)
        #pre_cmd = "cp ~/magic_pdf_html.json ~/magic-pdf.json"
        #os.system(pre_cmd)
        value = {
        "model": "rapid_table",
        "enable": True,
        "max_time": 400
        }   
        common.update_config_file(magic_pdf_config, "table-config", value)
        pdf_path = os.path.join(pdf_dev_path, "pdf", "test_rearch_report.pdf")
        common.delete_file(pdf_res_path)
        cli_cmd = "magic-pdf -p %s -o %s" % (pdf_path, pdf_res_path)
        os.system(cli_cmd)
        res = common.check_html_table_exists(os.path.join(pdf_res_path, "test_rearch_report", "auto", "test_rearch_report.md"))
        assert res is True
    
    
    @pytest.mark.P1
    def test_local_magic_pdf_doclayout_yolo(self):
        """magic pdf cli open doclyaout yolo."""
        time.sleep(2)
        #pre_cmd = "cp ~/magic_pdf_html.json ~/magic-pdf.json"
        #os.system(pre_cmd)
        value = {
        "model": "doclayout_yolo"
        }   
        common.update_config_file(magic_pdf_config, "layout-config", value)
        pdf_path = os.path.join(pdf_dev_path, "pdf", "test_rearch_report.pdf")
        common.delete_file(pdf_res_path)
        cli_cmd = "magic-pdf -p %s -o %s" % (pdf_path, pdf_res_path)
        os.system(cli_cmd)
        common.cli_count_folders_and_check_contents(os.path.join(pdf_res_path, "test_rearch_report", "auto"))

    @pytest.mark.P1
    def test_local_magic_pdf_layoutlmv3_yolo(self):
        """magic pdf cli open layoutlmv3."""
        time.sleep(2)
        value = {
        "model": "layoutlmv3"
        }   
        common.update_config_file(magic_pdf_config, "layout-config", value)
        pdf_path = os.path.join(pdf_dev_path, "pdf", "test_rearch_report.pdf")
        common.delete_file(pdf_res_path)
        cli_cmd = "magic-pdf -p %s -o %s" % (pdf_path, pdf_res_path)
        os.system(cli_cmd)
        common.cli_count_folders_and_check_contents(os.path.join(pdf_res_path, "test_rearch_report", "auto"))
        #res = common.check_html_table_exists(os.path.join(pdf_res_path, "test_rearch_report", "auto", "test_rearch_report.md"))

    @pytest.mark.P1
quyuan's avatar
quyuan committed
416
417
    def test_magic_pdf_cpu(self):
        """magic pdf cli cpu mode."""
yyy's avatar
yyy committed
418
        time.sleep(2)
quyuan's avatar
quyuan committed
419
420
421
422
        #pre_cmd = "cp ~/magic_pdf_html_table_cpu.json ~/magic-pdf.json"
        #os.system(pre_cmd)
        value = {
        "model": "tablemaster",
quyuan's avatar
quyuan committed
423
        "enable": False,
quyuan's avatar
quyuan committed
424
425
426
427
        "max_time": 400
        }   
        common.update_config_file(magic_pdf_config, "table-config", value)
        common.update_config_file(magic_pdf_config, "device-mode", "cpu")
yyy's avatar
yyy committed
428
429
430
431
        pdf_path = os.path.join(pdf_dev_path, "pdf", "test_rearch_report.pdf")
        common.delete_file(pdf_res_path)
        cli_cmd = "magic-pdf -p %s -o %s" % (pdf_path, pdf_res_path)
        os.system(cli_cmd)
quyuan's avatar
quyuan committed
432
        common.cli_count_folders_and_check_contents(os.path.join(pdf_res_path, "test_rearch_report", "auto"))
yyy's avatar
yyy committed
433

quyuan's avatar
quyuan committed
434

yyy's avatar
yyy committed
435
436
437
438
    @pytest.mark.P1
    def test_local_magic_pdf_close_html_table(self):
        """magic pdf cli close table."""
        time.sleep(2)
quyuan's avatar
quyuan committed
439
440
441
442
443
444
445
446
        #pre_cmd = "cp ~/magic_pdf_close_table.json ~/magic-pdf.json"
        #os.system(pre_cmd)
        value = {
        "model": "tablemaster",
        "enable": False,
        "max_time": 400
        }   
        common.update_config_file(magic_pdf_config, "table-config", value)
yyy's avatar
yyy committed
447
448
449
450
451
452
453
        pdf_path = os.path.join(pdf_dev_path, "pdf", "test_rearch_report.pdf")
        common.delete_file(pdf_res_path)
        cli_cmd = "magic-pdf -p %s -o %s" % (pdf_path, pdf_res_path)
        os.system(cli_cmd)
        res = common.check_close_tables(os.path.join(pdf_res_path, "test_rearch_report", "auto", "test_rearch_report.md"))
        assert res is True
    
quyuan's avatar
quyuan committed
454

yyy's avatar
yyy committed
455
 
yyy's avatar
yyy committed
456
457
if __name__ == '__main__':
    pytest.main()
dt-yy's avatar
dt-yy committed
458