"docs/vscode:/vscode.git/clone" did not exist on "d1112d8548eb13c842900b3a8d622345f9737759"
Unverified Commit 4419410c authored by MPU王荣胜's avatar MPU王荣胜 Committed by GitHub
Browse files

add data

parent c3a4340d
import xml.etree.ElementTree as ET
import os
import shutil
from tqdm import tqdm
for i in tqdm(range(1,4000)):
file_path = 'ecgen-radiology/'+str(i)+'.xml'
# 判断文件存在
if not os.path.isfile(file_path):
continue
else:
# 解析XML文件内容
tree = ET.parse(file_path)
root = tree.getroot()
# 遍历XML文件中所有的<url>标签
num = 1
for url in root.findall('.//url'):
s = str(url.text)
filename = s.split("/")[-1].split(".")[0]
src_file = './NLMCXR_png/'+str(filename)+'.png'
# 新建images文件夹
dst_file = './images/'+str(i)+'_'+str(num)+'.png'
shutil.copy(src_file, dst_file)
num = num+1
\ No newline at end of file
import json
from tqdm import tqdm
with open('./openi-en.json') as f:
data = json.load(f)
markdown_content = ''
for i in tqdm(range(len(data['annotations']))):
# 获取字典对象
#img = data['annotations'][i]['image_id']
annotation = data['annotations'][i]['caption']
markdown_content = markdown_content + str(annotation) + '\n\n'
with open('openi-en-md.md', 'w') as f1:
f1.write(markdown_content)
print(len(data['annotations']))
\ No newline at end of file
import json
from tqdm import tqdm
# 读取data.md文件,获取每行非空内容的列表
with open('data_ch.md', 'r', encoding='utf-8') as f:
data_lines = [line.strip() for line in f if line.strip()]
print(len(data_lines))
with open('./filter_cap.json') as f:
data = json.load(f)
print(len(data['annotations']))
for i in tqdm(range(len(data['annotations']))):
data['annotations'][i]['caption'] = data_lines[i]
with open('data_ch.json', 'w') as f1:
json.dump(data, f1)
with open('./data_ch.json') as f2:
data2 = json.load(f2)
print(len(data2['annotations']))
\ No newline at end of file
This diff is collapsed.
import json
import openai
import time
from tqdm import tqdm
# 设置 OpenAI API 账户信息
openai.api_key = "xxx"
# 定义翻译函数
def translate_text(text):
# 请翻译成中文,你可以适当润色翻译的内容,但是要保证整句话通顺并且原意不变:
prompt = "Translate the following English text to Chinese"+str(text)
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "你是一个非常优秀的中英文翻译器。"},
{"role": "user", "content": prompt}
]
)
#print(completion.choices[0].message['content'])
return str(completion.choices[0].message['content'])
with open('./openi-en.json') as f:
data = json.load(f)
for i in tqdm(range(len(data['annotations']))):
# 获取字典对象
annotation = data['annotations'][i]['caption']
translation = translate_text(annotation)
#print(translation)
data['annotations'][i]['caption'] = str(translation)
# 写入json文件
with open('openi-zh.json', 'w') as f1:
json.dump(data, f1)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment