Unverified Commit 673d30b8 authored by Raushan Turganbay's avatar Raushan Turganbay Committed by GitHub
Browse files

Chameleon: minor fixes after shipping (#32037)

* fix merging

* make chameleon conditional
parent 765732e9
...@@ -64,13 +64,13 @@ The original code can be found [here](https://github.com/facebookresearch/chamel ...@@ -64,13 +64,13 @@ The original code can be found [here](https://github.com/facebookresearch/chamel
Here's how to load the model and perform inference in half-precision (`torch.float16`): Here's how to load the model and perform inference in half-precision (`torch.float16`):
```python ```python
from transformers import ChameleonProcessor, ChameleonForCausalLM from transformers import ChameleonProcessor, ChameleonForConditionalGeneration
import torch import torch
from PIL import Image from PIL import Image
import requests import requests
processor = ChameleonProcessor.from_pretrained("meta-chameleon") processor = ChameleonProcessor.from_pretrained("meta-chameleon")
model = ChameleonForCausalLM.from_pretrained("meta-chameleon", torch_dtype=torch.float16, device_map="auto") model = ChameleonForConditionalGeneration.from_pretrained("meta-chameleon", torch_dtype=torch.float16, device_map="auto")
# prepare image and text prompt # prepare image and text prompt
url = "https://bjiujitsu.com/wp-content/uploads/2021/01/jiu_jitsu_belt_white_1.jpg" url = "https://bjiujitsu.com/wp-content/uploads/2021/01/jiu_jitsu_belt_white_1.jpg"
...@@ -89,13 +89,13 @@ print(processor.decode(output[0], skip_special_tokens=True)) ...@@ -89,13 +89,13 @@ print(processor.decode(output[0], skip_special_tokens=True))
Chameleon can perform inference with multiple images as input, where images either belong to the same prompt or different prompts (in batched inference). Here is how you can do it: Chameleon can perform inference with multiple images as input, where images either belong to the same prompt or different prompts (in batched inference). Here is how you can do it:
```python ```python
from transformers import ChameleonProcessor, ChameleonForCausalLM from transformers import ChameleonProcessor, ChameleonForConditionalGeneration
import torch import torch
from PIL import Image from PIL import Image
import requests import requests
processor = ChameleonProcessor.from_pretrained("meta-chameleon") processor = ChameleonProcessor.from_pretrained("meta-chameleon")
model = ChameleonForCausalLM.from_pretrained("meta-chameleon", torch_dtype=torch.float16, device_map="auto") model = ChameleonForConditionalGeneration.from_pretrained("meta-chameleon", torch_dtype=torch.float16, device_map="auto")
# Get three different images # Get three different images
url = "https://www.ilankelman.org/stopsigns/australia.jpg" url = "https://www.ilankelman.org/stopsigns/australia.jpg"
...@@ -129,7 +129,7 @@ processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokeniza ...@@ -129,7 +129,7 @@ processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokeniza
The model can be loaded in 8 or 4 bits, greatly reducing the memory requirements while maintaining the performance of the original model. First make sure to install bitsandbytes, `pip install bitsandbytes` and make sure to have access to a CUDA compatible GPU device. Simply change the snippet above with: The model can be loaded in 8 or 4 bits, greatly reducing the memory requirements while maintaining the performance of the original model. First make sure to install bitsandbytes, `pip install bitsandbytes` and make sure to have access to a CUDA compatible GPU device. Simply change the snippet above with:
```python ```python
from transformers import ChameleonForCausalLM, BitsAndBytesConfig from transformers import ChameleonForConditionalGeneration, BitsAndBytesConfig
# specify how to quantize the model # specify how to quantize the model
quantization_config = BitsAndBytesConfig( quantization_config = BitsAndBytesConfig(
...@@ -138,7 +138,7 @@ quantization_config = BitsAndBytesConfig( ...@@ -138,7 +138,7 @@ quantization_config = BitsAndBytesConfig(
bnb_4bit_compute_dtype=torch.float16, bnb_4bit_compute_dtype=torch.float16,
) )
model = ChameleonForCausalLM.from_pretrained("meta-chameleon", quantization_config=quantization_config, device_map="auto") model = ChameleonForConditionalGeneration.from_pretrained("meta-chameleon", quantization_config=quantization_config, device_map="auto")
``` ```
### Use Flash-Attention 2 and SDPA to further speed-up generation ### Use Flash-Attention 2 and SDPA to further speed-up generation
...@@ -146,9 +146,9 @@ model = ChameleonForCausalLM.from_pretrained("meta-chameleon", quantization_conf ...@@ -146,9 +146,9 @@ model = ChameleonForCausalLM.from_pretrained("meta-chameleon", quantization_conf
The models supports both, Flash-Attention 2 and PyTorch's [`torch.nn.functional.scaled_dot_product_attention`](https://pytorch.org/docs/master/generated/torch.nn.functional.scaled_dot_product_attention.html) which can be enables for optimization. SDPA is the default options when you load the model, If you want to switch for Flash Attention 2, first make sure to install flash-attn. Refer to the [original repository](https://github.com/Dao-AILab/flash-attention) regarding that package installation. Simply change the snippet above with: The models supports both, Flash-Attention 2 and PyTorch's [`torch.nn.functional.scaled_dot_product_attention`](https://pytorch.org/docs/master/generated/torch.nn.functional.scaled_dot_product_attention.html) which can be enables for optimization. SDPA is the default options when you load the model, If you want to switch for Flash Attention 2, first make sure to install flash-attn. Refer to the [original repository](https://github.com/Dao-AILab/flash-attention) regarding that package installation. Simply change the snippet above with:
```python ```python
from transformers import ChameleonForCausalLM from transformers import ChameleonForConditionalGeneration
model = ChameleonForCausalLM.from_pretrained( model = ChameleonForConditionalGeneration.from_pretrained(
model_id, model_id,
torch_dtype=torch.float16, torch_dtype=torch.float16,
low_cpu_mem_usage=True, low_cpu_mem_usage=True,
...@@ -183,7 +183,7 @@ model = ChameleonForCausalLM.from_pretrained( ...@@ -183,7 +183,7 @@ model = ChameleonForCausalLM.from_pretrained(
[[autodoc]] ChameleonModel [[autodoc]] ChameleonModel
- forward - forward
## ChameleonForCausalLM ## ChameleonForConditionalGeneration
[[autodoc]] ChameleonForCausalLM [[autodoc]] ChameleonForConditionalGeneration
- forward - forward
...@@ -1616,7 +1616,7 @@ else: ...@@ -1616,7 +1616,7 @@ else:
) )
_import_structure["models.chameleon"].extend( _import_structure["models.chameleon"].extend(
[ [
"ChameleonForCausalLM", "ChameleonForConditionalGeneration",
"ChameleonModel", "ChameleonModel",
"ChameleonPreTrainedModel", "ChameleonPreTrainedModel",
"ChameleonProcessor", "ChameleonProcessor",
...@@ -6276,7 +6276,7 @@ if TYPE_CHECKING: ...@@ -6276,7 +6276,7 @@ if TYPE_CHECKING:
load_tf_weights_in_canine, load_tf_weights_in_canine,
) )
from .models.chameleon import ( from .models.chameleon import (
ChameleonForCausalLM, ChameleonForConditionalGeneration,
ChameleonModel, ChameleonModel,
ChameleonPreTrainedModel, ChameleonPreTrainedModel,
ChameleonProcessor, ChameleonProcessor,
......
...@@ -446,7 +446,6 @@ MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict( ...@@ -446,7 +446,6 @@ MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict(
("blenderbot-small", "BlenderbotSmallForCausalLM"), ("blenderbot-small", "BlenderbotSmallForCausalLM"),
("bloom", "BloomForCausalLM"), ("bloom", "BloomForCausalLM"),
("camembert", "CamembertForCausalLM"), ("camembert", "CamembertForCausalLM"),
("chameleon", "ChameleonForCausalLM"),
("code_llama", "LlamaForCausalLM"), ("code_llama", "LlamaForCausalLM"),
("codegen", "CodeGenForCausalLM"), ("codegen", "CodeGenForCausalLM"),
("cohere", "CohereForCausalLM"), ("cohere", "CohereForCausalLM"),
...@@ -703,6 +702,7 @@ MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES = OrderedDict( ...@@ -703,6 +702,7 @@ MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES = OrderedDict(
[ [
("blip", "BlipForConditionalGeneration"), ("blip", "BlipForConditionalGeneration"),
("blip-2", "Blip2ForConditionalGeneration"), ("blip-2", "Blip2ForConditionalGeneration"),
("chameleon", "ChameleonForConditionalGeneration"),
("git", "GitForCausalLM"), ("git", "GitForCausalLM"),
("idefics2", "Idefics2ForConditionalGeneration"), ("idefics2", "Idefics2ForConditionalGeneration"),
("instructblip", "InstructBlipForConditionalGeneration"), ("instructblip", "InstructBlipForConditionalGeneration"),
......
...@@ -36,7 +36,7 @@ except OptionalDependencyNotAvailable: ...@@ -36,7 +36,7 @@ except OptionalDependencyNotAvailable:
pass pass
else: else:
_import_structure["modeling_chameleon"] = [ _import_structure["modeling_chameleon"] = [
"ChameleonForCausalLM", "ChameleonForConditionalGeneration",
"ChameleonModel", "ChameleonModel",
"ChameleonPreTrainedModel", "ChameleonPreTrainedModel",
"ChameleonVQVAE", "ChameleonVQVAE",
...@@ -62,7 +62,7 @@ if TYPE_CHECKING: ...@@ -62,7 +62,7 @@ if TYPE_CHECKING:
pass pass
else: else:
from .modeling_chameleon import ( from .modeling_chameleon import (
ChameleonForCausalLM, ChameleonForConditionalGeneration,
ChameleonModel, ChameleonModel,
ChameleonPreTrainedModel, ChameleonPreTrainedModel,
ChameleonVQVAE, ChameleonVQVAE,
......
...@@ -1279,7 +1279,8 @@ class ChameleonModel(ChameleonPreTrainedModel): ...@@ -1279,7 +1279,8 @@ class ChameleonModel(ChameleonPreTrainedModel):
if pixel_values is not None: if pixel_values is not None:
image_tokens = self.get_image_tokens(pixel_values) image_tokens = self.get_image_tokens(pixel_values)
special_image_mask = input_ids == self.vocabulary_mapping.image_token_id special_image_mask = input_ids == self.vocabulary_mapping.image_token_id
input_ids[special_image_mask] = image_tokens.flatten().to(input_ids.device, input_ids.dtype) image_tokens = image_tokens.to(input_ids.device, input_ids.dtype)
input_ids = input_ids.masked_scatter(special_image_mask, image_tokens)
if inputs_embeds is None: if inputs_embeds is None:
inputs_embeds = self.embed_tokens(input_ids) inputs_embeds = self.embed_tokens(input_ids)
...@@ -1445,7 +1446,7 @@ class ChameleonModel(ChameleonPreTrainedModel): ...@@ -1445,7 +1446,7 @@ class ChameleonModel(ChameleonPreTrainedModel):
"Chameleon Model with a head on top used for outputting logits for next token prediction.", "Chameleon Model with a head on top used for outputting logits for next token prediction.",
CHAMELEON_START_DOCSTRING, CHAMELEON_START_DOCSTRING,
) )
class ChameleonForCausalLM(ChameleonPreTrainedModel): class ChameleonForConditionalGeneration(ChameleonPreTrainedModel):
_tied_weights_keys = ["lm_head.weight"] _tied_weights_keys = ["lm_head.weight"]
def __init__(self, config): def __init__(self, config):
...@@ -1504,12 +1505,12 @@ class ChameleonForCausalLM(ChameleonPreTrainedModel): ...@@ -1504,12 +1505,12 @@ class ChameleonForCausalLM(ChameleonPreTrainedModel):
Example: Example:
```python ```python
>>> from transformers import ChameleonProcessor, ChameleonForCausalLM >>> from transformers import ChameleonProcessor, ChameleonForConditionalGeneration
>>> import torch >>> import torch
>>> import requests >>> import requests
>>> from PIL import Image >>> from PIL import Image
>>> model = ChameleonForCausalLM.from_pretrained("facebook/chameleon-7b", torch_dtype=torch.bfloat16) >>> model = ChameleonForConditionalGeneration.from_pretrained("facebook/chameleon-7b", torch_dtype=torch.bfloat16)
>>> processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b") >>> processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b")
>>> prompt = "I used to know a lot about constellations when I was younger, but as I grew older, I forgot most of what I knew. These are the only two constellations that I really remember now.<image><image>I would like for you to tell me about 3 more constellations and give me a little bit of history about the constellation." >>> prompt = "I used to know a lot about constellations when I was younger, but as I grew older, I forgot most of what I knew. These are the only two constellations that I really remember now.<image><image>I would like for you to tell me about 3 more constellations and give me a little bit of history about the constellation."
......
...@@ -1835,7 +1835,7 @@ def load_tf_weights_in_canine(*args, **kwargs): ...@@ -1835,7 +1835,7 @@ def load_tf_weights_in_canine(*args, **kwargs):
requires_backends(load_tf_weights_in_canine, ["torch"]) requires_backends(load_tf_weights_in_canine, ["torch"])
class ChameleonForCausalLM(metaclass=DummyObject): class ChameleonForConditionalGeneration(metaclass=DummyObject):
_backends = ["torch"] _backends = ["torch"]
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
......
...@@ -44,7 +44,7 @@ if is_torch_available(): ...@@ -44,7 +44,7 @@ if is_torch_available():
import torch import torch
from transformers import ( from transformers import (
ChameleonForCausalLM, ChameleonForConditionalGeneration,
ChameleonModel, ChameleonModel,
ChameleonProcessor, ChameleonProcessor,
) )
...@@ -191,7 +191,7 @@ class ChameleonModelTester: ...@@ -191,7 +191,7 @@ class ChameleonModelTester:
encoder_hidden_states, encoder_hidden_states,
encoder_attention_mask, encoder_attention_mask,
): ):
model = ChameleonForCausalLM(config=config) model = ChameleonForConditionalGeneration(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
result = model(input_ids, attention_mask=input_mask, labels=token_labels) result = model(input_ids, attention_mask=input_mask, labels=token_labels)
...@@ -209,7 +209,7 @@ class ChameleonModelTester: ...@@ -209,7 +209,7 @@ class ChameleonModelTester:
encoder_attention_mask, encoder_attention_mask,
): ):
config.is_decoder = True config.is_decoder = True
model = ChameleonForCausalLM(config=config) model = ChameleonForConditionalGeneration(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
...@@ -273,12 +273,12 @@ class ChameleonModelTester: ...@@ -273,12 +273,12 @@ class ChameleonModelTester:
@require_torch @require_torch
class ChameleonModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase): class ChameleonModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (ChameleonModel, ChameleonForCausalLM) if is_torch_available() else () all_model_classes = (ChameleonModel, ChameleonForConditionalGeneration) if is_torch_available() else ()
all_generative_model_classes = (ChameleonForCausalLM,) if is_torch_available() else () all_generative_model_classes = (ChameleonForConditionalGeneration,) if is_torch_available() else ()
pipeline_model_mapping = ( pipeline_model_mapping = (
{ {
"feature-extraction": ChameleonModel, "feature-extraction": ChameleonModel,
"text-generation": ChameleonForCausalLM, "text-generation": ChameleonForConditionalGeneration,
} }
if is_torch_available() if is_torch_available()
else {} else {}
...@@ -339,7 +339,7 @@ class ChameleonModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTester ...@@ -339,7 +339,7 @@ class ChameleonModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTester
""" """
Overwritting the common test as the test is flaky on tiny models Overwritting the common test as the test is flaky on tiny models
""" """
model = ChameleonForCausalLM.from_pretrained( model = ChameleonForConditionalGeneration.from_pretrained(
"facebook/chameleon-7b", "facebook/chameleon-7b",
load_in_4bit=True, load_in_4bit=True,
device_map={"": 0}, device_map={"": 0},
...@@ -355,7 +355,7 @@ class ChameleonModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTester ...@@ -355,7 +355,7 @@ class ChameleonModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTester
output_native = model.generate(**inputs, max_new_tokens=20, do_sample=False) output_native = model.generate(**inputs, max_new_tokens=20, do_sample=False)
output_native = processor.tokenizer.batch_decode(output_native) output_native = processor.tokenizer.batch_decode(output_native)
model = ChameleonForCausalLM.from_pretrained( model = ChameleonForConditionalGeneration.from_pretrained(
"facebook/chameleon-7b", "facebook/chameleon-7b",
load_in_4bit=True, load_in_4bit=True,
attn_implementation="flash_attention_2", attn_implementation="flash_attention_2",
...@@ -377,7 +377,9 @@ class ChameleonIntegrationTest(unittest.TestCase): ...@@ -377,7 +377,9 @@ class ChameleonIntegrationTest(unittest.TestCase):
@require_bitsandbytes @require_bitsandbytes
@require_read_token @require_read_token
def test_model_7b(self): def test_model_7b(self):
model = ChameleonForCausalLM.from_pretrained("facebook/chameleon-7b", load_in_4bit=True, device_map="auto") model = ChameleonForConditionalGeneration.from_pretrained(
"facebook/chameleon-7b", load_in_4bit=True, device_map="auto"
)
processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b") processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b")
image = Image.open( image = Image.open(
...@@ -397,7 +399,9 @@ class ChameleonIntegrationTest(unittest.TestCase): ...@@ -397,7 +399,9 @@ class ChameleonIntegrationTest(unittest.TestCase):
@require_bitsandbytes @require_bitsandbytes
@require_read_token @require_read_token
def test_model_7b_batched(self): def test_model_7b_batched(self):
model = ChameleonForCausalLM.from_pretrained("facebook/chameleon-7b", load_in_4bit=True, device_map="auto") model = ChameleonForConditionalGeneration.from_pretrained(
"facebook/chameleon-7b", load_in_4bit=True, device_map="auto"
)
processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b") processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b")
image = Image.open( image = Image.open(
...@@ -428,7 +432,9 @@ class ChameleonIntegrationTest(unittest.TestCase): ...@@ -428,7 +432,9 @@ class ChameleonIntegrationTest(unittest.TestCase):
@require_bitsandbytes @require_bitsandbytes
@require_read_token @require_read_token
def test_model_7b_multi_image(self): def test_model_7b_multi_image(self):
model = ChameleonForCausalLM.from_pretrained("facebook/chameleon-7b", load_in_4bit=True, device_map="auto") model = ChameleonForConditionalGeneration.from_pretrained(
"facebook/chameleon-7b", load_in_4bit=True, device_map="auto"
)
processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b") processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b")
image = Image.open( image = Image.open(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment