ofa-large_zeroshot_vqa.py 924 Bytes
Newer Older
renzhc's avatar
renzhc committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
_base_ = [
    '../_base_/datasets/coco_vqa.py',
    '../_base_/default_runtime.py',
]

# model settings
model = dict(
    type='OFA',
    task='vqa',
    vocab_size=59457,
    embedding_dim=1024,
    encoder_cfg=dict(
        embed_images=dict(type='OFAResNet', depth=152),
        num_layers=12,
        num_heads=16,
    ),
    decoder_cfg=dict(
        num_layers=12,
        num_heads=16,
    ),
    generation_cfg=dict(
        num_beams=20,
        max_new_tokens=200,
        length_penalty=0.,  # VQA doesn't require longer answer.
        use_cache=True,
    ),
    tokenizer=dict(type='OFATokenizer', name_or_path='OFA-Sys/OFA-large'),
)

# data settings
data_preprocessor = dict(
    type='MultiModalDataPreprocessor',
    mean=[127.5, 127.5, 127.5],
    std=[127.5, 127.5, 127.5],
    to_rgb=True,
)

train_dataloader = None  # Eval only

# schedule settings
train_cfg = None
val_cfg = dict()
test_cfg = dict()