[Model] Yhzhang/add mlugowl llamaadapter (#405)

* refine gitignore * [Feature]: Add minigpt-4 * [Feature]: Add mm local runner * [Feature]: Add instructblip * add otter and llama-adapter * add owl * add llama2-adapter and owl * lint * [Feature]: Add minigpt-4 * [Feature]: Add instructblip * add otter and llama-adapter * add owl * add llama2-adapter and owl * lint * lint * update * lint * lint * add __init__.py * update * update * update * update * [Feature]: Add minigpt-4 * [Feature]: Add mm local runner * [Feature]: Add instructblip * add otter and llama-adapter * add owl * add llama2-adapter and owl * lint * [Feature]: Add minigpt-4 * [Feature]: Add instructblip * add otter and llama-adapter * add owl * add llama2-adapter and owl * lint * lint * update * lint * lint * add __init__.py * update * update * update * update * optimize mmbench dataset args * update * update * run commit hook --------- Co-authored-by: liuyuan <3463423099@qq.com> Co-authored-by: kennymckormick <dhd@pku.edu.cn> Co-authored-by: kennymckormick <dhd.efz@gmail.com>

[Model] Yhzhang/add mlugowl llamaadapter (#405)
* refine gitignore * [Feature]: Add minigpt-4 * [Feature]: Add mm local runner * [Feature]: Add instructblip * add otter and llama-adapter * add owl * add llama2-adapter and owl * lint * [Feature]: Add minigpt-4 * [Feature]: Add instructblip * add otter and llama-adapter * add owl * add llama2-adapter and owl * lint * lint * update * lint * lint * add __init__.py * update * update * update * update * [Feature]: Add minigpt-4 * [Feature]: Add mm local runner * [Feature]: Add instructblip * add otter and llama-adapter * add owl * add llama2-adapter and owl * lint * [Feature]: Add minigpt-4 * [Feature]: Add instructblip * add otter and llama-adapter * add owl * add llama2-adapter and owl * lint * lint * update * lint * lint * add __init__.py * update * update * update * update * optimize mmbench dataset args * update * update * run commit hook --------- Co-authored-by: liuyuan <3463423099@qq.com> Co-authored-by: kennymckormick <dhd@pku.edu.cn> Co-authored-by: kennymckormick <dhd.efz@gmail.com>
7c2726c2 · Yuanhan Zhang · GitHub · 267401bd · 7c2726c2 · 7c2726c2
Unverified Commit 7c2726c2 authored Sep 19, 2023 by Yuanhan Zhang Committed by GitHub Sep 19, 2023
12 changed files
--- a/configs/multimodal/llama_adapter_v2_multimodal/llama_adapter_v2_mm_7b_mmbench.py
+++ b/configs/multimodal/llama_adapter_v2_multimodal/llama_adapter_v2_mm_7b_mmbench.py
@@ -28,18 +28,21 @@ llama_adapter_mmbench_dataloader = dict(batch_size=1,
                  sampler=dict(type='DefaultSampler', shuffle=False))
 # model settings
-llama_adapter_model = dict(
+llama_adapter_mmbench_model = dict(
    type='LLaMA-adapter-v2',
    llama_dir=  # noqa
    '/llama_adapter_v2_multimodal',
    prompt_constructor=dict(type=LlamaAadapterMMBenchPromptConstructor),
-    post_processor=dict(type=LlamaAadapterMMBenchPostProcessor))
+    post_processor=dict(type=LlamaAadapterMMBenchPostProcessor)
 )
 # evaluation settings
-llama_adapter_evaluator = [
+llama_adapter_mmbench_evaluator = [
    dict(
        type='opencompass.DumpResults',
        save_path='work_dirs/llama-adapter-v2-multimodal-mmagibench-v0.1.0.xlsx'
    )
 ]
+llama_adapter_mmbench_load_from = None  # noqa
--- a/configs/multimodal/mplug_owl/mplug_owl-7b-mmbench.py
+++ b/configs/multimodal/mplug_owl/mplug_owl-7b-mmbench.py
@@ -35,8 +35,8 @@ mplug_owl_mmbench_dataloader = dict(
 # model settings
 mplug_owl_mmbench_model = dict(
-    type='mplug_owl-7b',
+    type='mplug_owl_7b',
-    model_path='/mplug-owl-llama-7b-ft',
+    model_path='/mplug-owl-llama-7b-ft/',
    prompt_constructor=dict(type=MplugOwlMMBenchPromptConstructor),
    post_processor=dict(type=MplugOwlMMBenchPostProcessor)
 )  # noqa
@@ -46,3 +46,5 @@ mplug_owl_mmbench_evaluator = [
    dict(type='opencompass.DumpResults',
         save_path='work_dirs/mplug_owl-7b-mmagibench-v0.1.0.xlsx')
 ]
+mplug_owl_mmbench_load_from = None
\ No newline at end of file
--- a/opencompass/multimodal/datasets/mmbench.py
+++ b/opencompass/multimodal/datasets/mmbench.py
@@ -41,12 +41,11 @@ class MMBenchDataset(Dataset):
        return len(self.df)
    def __getitem__(self, idx: int) -> dict:
+        # Mandatory Fields Begin
        index = self.df.iloc[idx]['index']
        image = self.df.iloc[idx]['image']
        image = decode_base64_to_image(image)
        question = self.df.iloc[idx]['question']
-        catetory = self.df.iloc[idx]['category']
-        l2_catetory = self.df.iloc[idx]['l2-category']
        option_candidate = ['A', 'B', 'C', 'D', 'E']
        options = {
@@ -57,13 +56,19 @@ class MMBenchDataset(Dataset):
        options_prompt = f'{self.sys_prompt}\n'
        for key, item in options.items():
            options_prompt += f'{key}. {item}\n'
+        # Mandatory Fields End
+        # Optional Fields Begin
        hint = self.load_from_df(idx, 'hint')
+        category = self.load_from_df(idx, 'category')
+        l2_catetory = self.load_from_df(idx, 'l2-category')
+        # Optional Fields End
        data = {
            'img': image,
            'question': question,
            'options': options_prompt,
-            'category': catetory,
+            'category': category,
            'l2-category': l2_catetory,
            'options_dict': options,
            'index': index,

--- a/opencompass/multimodal/models/llama_adapter_v2_multimodal/llama_adapter.py
+++ b/opencompass/multimodal/models/llama_adapter_v2_multimodal/llama_adapter.py
 import json
 import os
+import os.path as osp
+import sys
 from pathlib import Path
 import clip
 import mmengine
 import torch
 import torch.nn as nn
-from llama_adapter_v2_multimodal7b.llama.llama import ModelArgs, Transformer
-from llama_adapter_v2_multimodal7b.llama.tokenizer import Tokenizer
-from llama_adapter_v2_multimodal7b.llama.utils import sample_top_p
 from mmengine.device import get_device
 from timm.models.vision_transformer import Block
 from opencompass.registry import MM_MODELS
+def load_package():
+    """Load required packages from llama_adapter_v2_multimodal7b."""
+    current_file_path = os.path.abspath(__file__)
+    current_folder_path = os.path.dirname(current_file_path)
+    sys.path.append(os.path.join(current_folder_path, 'LLaMA-Adapter'))  # noqa
+    from llama_adapter_v2_multimodal7b.llama.llama import (ModelArgs,
+                                                           Transformer)
+    from llama_adapter_v2_multimodal7b.llama.tokenizer import Tokenizer
+    from llama_adapter_v2_multimodal7b.llama.utils import sample_top_p
+    sys.path.pop(-1)
+    return ModelArgs, Transformer, Tokenizer, sample_top_p
+ModelArgs, Transformer, Tokenizer, sample_top_p = load_package()
 class LLaMA_adapter(nn.Module):
    def __init__(self,
@@ -182,7 +199,6 @@ class LLaMA_adapter(nn.Module):
        data_sample = data_samples[0]
-        prompts = [prompts]
        imgs = image
        # import pdb;pdb.set_trace()
@@ -261,12 +277,14 @@ class LLaMA_adapter_v2(nn.Module):
                 llama_dir,
                 prompt_constructor: dict,
                 post_processor: dict,
+                 model_path: str = 'llama_adapter_v2_multimodal7b',
+                 name: str = 'LORA-BIAS-7B',
                 mode: str = 'generation',
                 device='cuda' if torch.cuda.is_available() else 'cpu',
                 download_root='ckpts'):
        super().__init__()
-        name = 'BIAS-7B'
+        assert name in ['LORA-BIAS-7B', 'BIAS-7B', 'CAPTION-7B']
        # BIAS-7B or https://xxx/sha256_BIAS-7B.pth -> 7B
        llama_type = name.split('.')[0].split('-')[-1]
        llama_ckpt_dir = os.path.join(llama_dir, llama_type)
@@ -274,9 +292,22 @@ class LLaMA_adapter_v2(nn.Module):
        # load llama_adapter weights and model_cfg
        print(f'Loading LLaMA-Adapter from {llama_dir}')
-        ckpt = torch.load(
-            f'{llama_dir}/7fa55208379faf2dd862565284101b0e4a2a72114d6490a95e432cf9d9b6c813_BIAS-7B.pth',  # noqa: E501
+        current_file_path = os.path.abspath(__file__)
+        current_folder_path = os.path.dirname(current_file_path)
+        model_path = osp.join(current_folder_path, 'LLaMA-Adapter', model_path)
+        ckpt_root = osp.join(model_path, download_root)
+        ckpt_map = {
+            'LORA-BIAS-7B':
+            '1bcbffc43484332672092e0024a8699a6eb5f558161aebf98a7c6b1db67224d1_LORA-BIAS-7B.pth',  # noqa: E501
+            'BIAS-7B':
+            '7fa55208379faf2dd862565284101b0e4a2a72114d6490a95e432cf9d9b6c813_BIAS-7B.pth',  # noqa: E501
+            'CAPTION-7B':
+            '5088aeb63a89746b90bcfd5cb819e1c7411b2771b267c6d131ce73e250a8abf0_CAPTION-7B.pth'  # noqa: E501
+        }
+        ckpt = torch.load(osp.join(ckpt_root, ckpt_map[name]),
                          map_location='cpu')
        model_cfg = ckpt.get('config', {})
        self.model = LLaMA_adapter(

--- a/opencompass/multimodal/models/llama_adapter_v2_multimodal/post_processor.py
+++ b/opencompass/multimodal/models/llama_adapter_v2_multimodal/post_processor.py
@@ -7,7 +7,7 @@ class LlamaAadapterMMBenchPostProcessor:
    def __init__(self) -> None:
        pass
-    def __call__(self, output_token: torch.tensor, tokenizer) -> str:
+    def __call__(self, output_token: torch.tensor) -> str:
        if len(output_token) >= 2:
            if output_token[1] == '.':

--- a/opencompass/multimodal/models/llama_adapter_v2_multimodal/prompt_constructor.py
+++ b/opencompass/multimodal/models/llama_adapter_v2_multimodal/prompt_constructor.py
@@ -49,8 +49,10 @@ class LlamaAadapterMMBenchPromptConstructor:
                data_sample.get('context') for data_sample in data_samples
            ]
        else:
-            context = ''
+            context = [''] * len(data_samples)
+        prompts = []
-        prompts = context + ' ' + question + ' ' + options  # noqa
+        for cur_context, cur_question, cur_options in zip(
+                context, question, options):
+            prompts.append(cur_context + ' ' + cur_question + ' ' +
+                           cur_options)  # noqa
        return prompts
--- a/opencompass/multimodal/models/mplug_owl/__init__.py
+++ b/opencompass/multimodal/models/mplug_owl/__init__.py
-from .mplug_owl import MplugOwl
+from .mplug_owl_7b import MplugOwl
 from .post_processor import MplugOwlMMBenchPostProcessor
 from .prompt_constructor import MplugOwlMMBenchPromptConstructor  # noqa

--- a/opencompass/multimodal/models/mplug_owl/mplug_owl.py
+++ b/opencompass/multimodal/models/mplug_owl/mplug_owl.py
+import os
+import sys
 import mmengine
 import torch
 import torch.nn as nn
 from mmengine.device import get_device
-# Load via Huggingface Style
-from mplug_owl.modeling_mplug_owl import MplugOwlForConditionalGeneration
-from mplug_owl.processing_mplug_owl import (MplugOwlImageProcessor,
-                                            MplugOwlProcessor)
-from mplug_owl.tokenization_mplug_owl import MplugOwlTokenizer
 from opencompass.registry import MM_MODELS
-@MM_MODELS.register_module('mplug_owl')
+def load_package():
+    """Load required packages from llama_adapter_v2_multimodal7b."""
+    current_file_path = os.path.abspath(__file__)
+    current_folder_path = os.path.dirname(current_file_path)
+    sys.path.append(os.path.join(current_folder_path, 'mPLUG-Owl'))  # noqa
+    from mplug_owl.modeling_mplug_owl import MplugOwlForConditionalGeneration
+    from mplug_owl.processing_mplug_owl import (MplugOwlImageProcessor,
+                                                MplugOwlProcessor)
+    from mplug_owl.tokenization_mplug_owl import MplugOwlTokenizer
+    sys.path.pop(-1)
+    return MplugOwlForConditionalGeneration, MplugOwlImageProcessor, MplugOwlProcessor, MplugOwlTokenizer  # noqa
+MplugOwlForConditionalGeneration, MplugOwlImageProcessor, MplugOwlProcessor, MplugOwlTokenizer = load_package(  # noqa
+)  # noqa
+@MM_MODELS.register_module('mplug_owl_7b')
 class MplugOwl(nn.Module):
    def __init__(self,
                 prompt_constructor: dict,
                 post_processor: dict,
                 model_path='MAGAer13/mplug-owl-llama-7b',
-                 mode: str = 'generation') -> None:
+                 mode: str = 'generation'):
        super().__init__()
        pretrained_ckpt = model_path
        # import pdb;pdb.set_trace()
+        print(pretrained_ckpt)
        self.model = MplugOwlForConditionalGeneration.from_pretrained(
            pretrained_ckpt,
            torch_dtype=torch.bfloat16,
@@ -57,7 +75,7 @@ class MplugOwl(nn.Module):
        inputs = {'image': images, 'data_samples': data_samples}
        inputs = self.prompt_constructor(inputs)
        image = inputs['image']
-        prompt = inputs['prompt']
+        prompt = inputs['prompt'][0]
        data_samples = inputs['data_samples']
        data_sample = data_samples[0]

--- a/opencompass/multimodal/models/mplug_owl/post_processor.py
+++ b/opencompass/multimodal/models/mplug_owl/post_processor.py
@@ -9,7 +9,7 @@ class MplugOwlMMBenchPostProcessor:
    def __init__(self) -> None:
        pass
-    def __call__(self, output_token: torch.tensor, tokenizer) -> str:
+    def __call__(self, output_token: torch.tensor) -> str:
        pattern = re.compile(r'([A-Z]\.)')
        res = pattern.findall(output_token)
        if len(res) > 0:

--- a/opencompass/multimodal/models/mplug_owl/prompt_constructor.py
+++ b/opencompass/multimodal/models/mplug_owl/prompt_constructor.py
@@ -48,8 +48,11 @@ class MplugOwlMMBenchPromptConstructor:
                data_sample.get('context') for data_sample in data_samples
            ]
        else:
-            context = ''
+            context = [''] * len(data_samples)
+        prompts = []
-        prompts = context + ' ' + question + ' ' + options  # noqa
+        for cur_context, cur_question, cur_options in zip(
+                context, question, options):
+            prompts.append(cur_context + ' ' + cur_question + ' ' +
+                           cur_options)  # noqa
        return prompts
--- a/opencompass/tasks/mm_infer.py
+++ b/opencompass/tasks/mm_infer.py
@@ -122,6 +122,7 @@ class MultimodalInferTask:
        dataloader = Runner.build_dataloader(self.dataloader)
        # build model
        model = build_model(self.cfg)
+        model.eval()
        # build evaluator
        evaluator = Evaluator(self.evaluator)

--- a/opencompass/utils/run.py
+++ b/opencompass/utils/run.py
@@ -103,7 +103,8 @@ def get_config_from_arg(args) -> Config:
        cfg = Config.fromfile(s[1])
        summarizer = cfg['summarizer']
-    return Config(dict(models=models, datasets=datasets, summarizer=summarizer),
+    return Config(dict(models=models, datasets=datasets,
+                       summarizer=summarizer),
                  format_python_code=False)