"mmdet3d/datasets/waymo_dataset.py" did not exist on "62ce67c092a4cb5810326597e3b77e78a9b0f7fb"
download.py 945 Bytes
Newer Older
赵小蒙's avatar
赵小蒙 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import json
import os
from tqdm import tqdm

from libs.commons import join_path

with open('/mnt/petrelfs/share_data/ouyanglinke/OCR/OCR_validation_dataset.json', 'r') as f:
    samples = json.load(f)

pdf_model_dir = 's3://llm-pdf-text/eval_1k/layout_res/'

labels = []
det_res = []
edit_distance_list = []
for sample in tqdm(samples):
    pdf_name = sample['pdf_name']
    page_num = sample['page']
    pdf_model_path = join_path(pdf_model_dir, pdf_name)
    model_output_json = join_path(pdf_model_path, f"page_{page_num}.json") # 模型输出的页面编号从1开始的
    save_root_path = '/mnt/petrelfs/share_data/ouyanglinke/OCR/OCR_val_docxchain/'
    save_path = join_path(save_root_path, pdf_name)
    os.makedirs(save_path, exist_ok=True)
    # print("s3c cp {} {}".format(model_output_json, save_path))
    os.system("aws --profile langchao --endpoint-url=http://10.140.85.161:80 s3 cp {} {}".format(model_output_json, save_path))