"docs/git@developer.sourcefind.cn:OpenDAS/nni.git" did not exist on "2815fb1f22e36854463d3ede0a9da4aa7435739a"
Commit 18dd8da6 authored by dongchy920's avatar dongchy920
Browse files

Update projects/instructblip/run_demo.py, README.md, scienceqa_data_preprocess.py files

parent e25524e9
...@@ -71,11 +71,22 @@ export HF_ENDPOINT=https://hf-mirror.com ...@@ -71,11 +71,22 @@ export HF_ENDPOINT=https://hf-mirror.com
SCNet快速下载链接: SCNet快速下载链接:
- http://113.200.138.88:18080/aimodels/findsource-dependency/vicuna-13b-v1.1 - http://113.200.138.88:18080/aimodels/findsource-dependency/vicuna-13b-v1.1
数据集结构为:
input
├── scienceqa
│ ├── images
│ │ ├── train
│ │ ├── test
│ │ ├── val
│ ├── scienceqa_problems_path.json
│ ├── scienceqa_pid_splits.json
对ScienceQA进行预处理: 对ScienceQA进行预处理:
``` ```
# 修改预处理代码中scienceqa数据集解压位置 # 修改预处理代码中scienceqa数据路径为数据解压位置,上面的数据结构将数据解压至input文件夹中
python scienceqa_data_preprocess.py python scienceqa_data_preprocess.py
``` ```
运行该命令后会在/input/scienceqa/目录下生成scienceqa_train.json、scienceqa_test.json、scienceqa_val.json文件
该命令将scienceQA转为指令微调数据集,指令格式为: 该命令将scienceQA转为指令微调数据集,指令格式为:
``` ```
<Image> Context: { {hint} {lecture} } Question: { {question} } Options: { {choices} } Answer: (a) { {answer} } <Image> Context: { {hint} {lecture} } Question: { {question} } Options: { {choices} } Answer: (a) { {answer} }
......
...@@ -116,4 +116,4 @@ if __name__ == '__main__': ...@@ -116,4 +116,4 @@ if __name__ == '__main__':
inputs=[image_input, prompt_textbox, min_len, max_len, beam_size, len_penalty, repetition_penalty, top_p, sampling], inputs=[image_input, prompt_textbox, min_len, max_len, beam_size, len_penalty, repetition_penalty, top_p, sampling],
outputs="text", outputs="text",
allow_flagging="never", allow_flagging="never",
).launch() ).launch(share=True, server_name="0.0.0.0")
import json import json
from tqdm import tqdm from tqdm import tqdm
with open("scienceqa_problems_path.json", 'r') as file: with open("/input/scienceqa/scienceqa_problems_path.json", 'r') as file:
data = json.load(file) data = json.load(file)
with open("scienceqa_pid_splits.json") as file: with open("/input/scienceqa/scienceqa_pid_splits.json") as file:
pid_splits = json.load(file) pid_splits = json.load(file)
train_ids = pid_splits['train'] train_ids = pid_splits['train']
...@@ -18,7 +18,7 @@ for id in tqdm(train_ids): ...@@ -18,7 +18,7 @@ for id in tqdm(train_ids):
train_data = data[str(id)] train_data = data[str(id)]
if train_data['image'] is None: if train_data['image'] is None:
continue continue
image_url = f"scienceqa/images/train/{id}/image.png" image_url = f"/input/scienceqa/images/train/{id}/image.png"
if train_data['answer'] == 0: if train_data['answer'] == 0:
answer = "(a) " + train_data['choices'][train_data['answer']] answer = "(a) " + train_data['choices'][train_data['answer']]
elif train_data['answer'] == 1: elif train_data['answer'] == 1:
...@@ -46,7 +46,7 @@ for id in tqdm(val_ids): ...@@ -46,7 +46,7 @@ for id in tqdm(val_ids):
val_data = data[str(id)] val_data = data[str(id)]
if val_data['image'] is None: if val_data['image'] is None:
continue continue
image_url = f"scienceqa/images/val/{id}/image.png" image_url = f"/input/scienceqa/images/val/{id}/image.png"
if val_data['answer'] == 0: if val_data['answer'] == 0:
answer = "(a) " + val_data['choices'][val_data['answer']] answer = "(a) " + val_data['choices'][val_data['answer']]
elif val_data['answer'] == 1: elif val_data['answer'] == 1:
...@@ -74,7 +74,7 @@ for id in tqdm(test_ids): ...@@ -74,7 +74,7 @@ for id in tqdm(test_ids):
test_data = data[str(id)] test_data = data[str(id)]
if test_data['image'] is None: if test_data['image'] is None:
continue continue
image_url = f"scienceqa/images/test/{id}/image.png" image_url = f"/input/scienceqa/images/test/{id}/image.png"
if test_data['answer'] == 0: if test_data['answer'] == 0:
answer = "(a) " + test_data['choices'][test_data['answer']] answer = "(a) " + test_data['choices'][test_data['answer']]
elif test_data['answer'] == 1: elif test_data['answer'] == 1:
...@@ -102,4 +102,4 @@ with open("/input/scienceqa/scienceqa_test.json", 'w') as file: ...@@ -102,4 +102,4 @@ with open("/input/scienceqa/scienceqa_test.json", 'w') as file:
json.dump(test_annotation, file) json.dump(test_annotation, file)
with open("/input/scienceqa/scienceqa_val.json", 'w') as file: with open("/input/scienceqa/scienceqa_val.json", 'w') as file:
json.dump(val_annotation, file) json.dump(val_annotation, file)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment