Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
InstructBLIP_pytorch
Commits
18dd8da6
Commit
18dd8da6
authored
Sep 14, 2024
by
dongchy920
Browse files
Update projects/instructblip/run_demo.py, README.md, scienceqa_data_preprocess.py files
parent
e25524e9
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
19 additions
and
8 deletions
+19
-8
README.md
README.md
+12
-1
projects/instructblip/run_demo.py
projects/instructblip/run_demo.py
+1
-1
scienceqa_data_preprocess.py
scienceqa_data_preprocess.py
+6
-6
No files found.
README.md
View file @
18dd8da6
...
...
@@ -71,11 +71,22 @@ export HF_ENDPOINT=https://hf-mirror.com
SCNet快速下载链接:
-
http://113.200.138.88:18080/aimodels/findsource-dependency/vicuna-13b-v1.1
数据集结构为:
input
├── scienceqa
│ ├── images
│ │ ├── train
│ │ ├── test
│ │ ├── val
│ ├── scienceqa_problems_path.json
│ ├── scienceqa_pid_splits.json
对ScienceQA进行预处理:
```
# 修改预处理代码中scienceqa数据
集解压位置
# 修改预处理代码中scienceqa数据
路径为数据解压位置,上面的数据结构将数据解压至input文件夹中
python scienceqa_data_preprocess.py
```
运行该命令后会在/input/scienceqa/目录下生成scienceqa_train.json、scienceqa_test.json、scienceqa_val.json文件
该命令将scienceQA转为指令微调数据集,指令格式为:
```
<Image> Context: { {hint} {lecture} } Question: { {question} } Options: { {choices} } Answer: (a) { {answer} }
...
...
projects/instructblip/run_demo.py
View file @
18dd8da6
...
...
@@ -116,4 +116,4 @@ if __name__ == '__main__':
inputs
=
[
image_input
,
prompt_textbox
,
min_len
,
max_len
,
beam_size
,
len_penalty
,
repetition_penalty
,
top_p
,
sampling
],
outputs
=
"text"
,
allow_flagging
=
"never"
,
).
launch
()
).
launch
(
share
=
True
,
server_name
=
"0.0.0.0"
)
scienceqa_data_preprocess.py
View file @
18dd8da6
import
json
from
tqdm
import
tqdm
with
open
(
"scienceqa_problems_path.json"
,
'r'
)
as
file
:
with
open
(
"
/input/scienceqa/
scienceqa_problems_path.json"
,
'r'
)
as
file
:
data
=
json
.
load
(
file
)
with
open
(
"scienceqa_pid_splits.json"
)
as
file
:
with
open
(
"
/input/scienceqa/
scienceqa_pid_splits.json"
)
as
file
:
pid_splits
=
json
.
load
(
file
)
train_ids
=
pid_splits
[
'train'
]
...
...
@@ -18,7 +18,7 @@ for id in tqdm(train_ids):
train_data
=
data
[
str
(
id
)]
if
train_data
[
'image'
]
is
None
:
continue
image_url
=
f
"scienceqa/images/train/
{
id
}
/image.png"
image_url
=
f
"
/input/
scienceqa/images/train/
{
id
}
/image.png"
if
train_data
[
'answer'
]
==
0
:
answer
=
"(a) "
+
train_data
[
'choices'
][
train_data
[
'answer'
]]
elif
train_data
[
'answer'
]
==
1
:
...
...
@@ -46,7 +46,7 @@ for id in tqdm(val_ids):
val_data
=
data
[
str
(
id
)]
if
val_data
[
'image'
]
is
None
:
continue
image_url
=
f
"scienceqa/images/val/
{
id
}
/image.png"
image_url
=
f
"
/input/
scienceqa/images/val/
{
id
}
/image.png"
if
val_data
[
'answer'
]
==
0
:
answer
=
"(a) "
+
val_data
[
'choices'
][
val_data
[
'answer'
]]
elif
val_data
[
'answer'
]
==
1
:
...
...
@@ -74,7 +74,7 @@ for id in tqdm(test_ids):
test_data
=
data
[
str
(
id
)]
if
test_data
[
'image'
]
is
None
:
continue
image_url
=
f
"scienceqa/images/test/
{
id
}
/image.png"
image_url
=
f
"
/input/
scienceqa/images/test/
{
id
}
/image.png"
if
test_data
[
'answer'
]
==
0
:
answer
=
"(a) "
+
test_data
[
'choices'
][
test_data
[
'answer'
]]
elif
test_data
[
'answer'
]
==
1
:
...
...
@@ -102,4 +102,4 @@ with open("/input/scienceqa/scienceqa_test.json", 'w') as file:
json
.
dump
(
test_annotation
,
file
)
with
open
(
"/input/scienceqa/scienceqa_val.json"
,
'w'
)
as
file
:
json
.
dump
(
val_annotation
,
file
)
\ No newline at end of file
json
.
dump
(
val_annotation
,
file
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment