Bump up transformers version & Remove MistralConfig (#1254)

e7c8555d · Woosuk Kwon · GitHub · ec3b5ce9 · e7c8555d · e7c8555d
Unverified Commit e7c8555d authored Oct 13, 2023 by Woosuk Kwon Committed by GitHub Oct 13, 2023
6 changed files
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,8 +6,8 @@ pyarrow  # Required for Ray data.
 sentencepiece  # Required for LLaMA tokenizer.
 numpy
 torch == 2.0.1
-transformers >= 4.33.1  # Required for Code Llama.
+transformers >= 4.34.0  # Required for Mistral.
-xformers == 0.0.22
+xformers == 0.0.22  # Required for Mistral.
 fastapi
 uvicorn[standard]
 pydantic < 2  # Required for OpenAI server.
--- a/vllm/model_executor/models/__init__.py
+++ b/vllm/model_executor/models/__init__.py
@@ -9,10 +9,10 @@ from vllm.model_executor.models.gpt_j import GPTJForCausalLM
 from vllm.model_executor.models.gpt_neox import GPTNeoXForCausalLM
 from vllm.model_executor.models.internlm import InternLMForCausalLM
 from vllm.model_executor.models.llama import LlamaForCausalLM
+from vllm.model_executor.models.mistral import MistralForCausalLM
 from vllm.model_executor.models.mpt import MPTForCausalLM
 from vllm.model_executor.models.opt import OPTForCausalLM
 from vllm.model_executor.models.qwen import QWenLMHeadModel
-from vllm.model_executor.models.mistral import MistralForCausalLM
 __all__ = [
    "AquilaForCausalLM",

--- a/vllm/model_executor/models/mistral.py
+++ b/vllm/model_executor/models/mistral.py
@@ -29,6 +29,7 @@ from typing import List, Optional, Tuple
 import torch
 from torch import nn
+from transformers import MistralConfig
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import SiluAndMul
@@ -44,7 +45,6 @@ from vllm.model_executor.weight_utils import (
    convert_pyslice_to_tensor, hf_model_weights_iterator,
    load_tensor_parallel_weights, load_padded_tensor_parallel_vocab)
 from vllm.sequence import SamplerOutput
-from vllm.transformers_utils.configs.mistral import MistralConfig
 KVCache = Tuple[torch.Tensor, torch.Tensor]

--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -17,15 +17,6 @@ _CONFIG_REGISTRY = {
 def get_config(model: str,
               trust_remote_code: bool,
               revision: Optional[str] = None) -> PretrainedConfig:
-    # NOTE: Because the Mistral model in HF hub does not have
-    # `configuration_mistral.py`, we cannot use `AutoConfig` to load the
-    # config. Instead, we use `MistralConfig` directly.
-    # NOTE: This is a hack. This does not work for local models.
-    # FIXME: Remove this once the Mistral model is available in the stable
-    # version of HF transformers.
-    if "mistral" in model.lower():
-        return MistralConfig.from_pretrained(model, revision=revision)
    try:
        config = AutoConfig.from_pretrained(
            model, trust_remote_code=trust_remote_code, revision=revision)

--- a/vllm/transformers_utils/configs/__init__.py
+++ b/vllm/transformers_utils/configs/__init__.py
@@ -6,7 +6,6 @@ from vllm.transformers_utils.configs.qwen import QWenConfig
 # tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
 # `FalconConfig` class from the official HuggingFace transformers library.
 from vllm.transformers_utils.configs.falcon import RWConfig
-from vllm.transformers_utils.configs.mistral import MistralConfig
 __all__ = [
    "MPTConfig",
@@ -14,5 +13,4 @@ __all__ = [
    "AquilaConfig",
    "QWenConfig",
    "RWConfig",
-    "MistralConfig",
 ]
--- a/vllm/transformers_utils/configs/mistral.py
+++ b/vllm/transformers_utils/configs/mistral.py
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Mistral-7B-v0.1 configuration"""
-from transformers.configuration_utils import PretrainedConfig
-class MistralConfig(PretrainedConfig):
-    model_type = "mistral"
-    keys_to_ignore_at_inference = ["past_key_values"]
-    def __init__(
-        self,
-        vocab_size=32000,
-        hidden_size=4096,
-        intermediate_size=14336,
-        num_hidden_layers=32,
-        num_attention_heads=32,
-        num_key_value_heads=8,
-        hidden_act="silu",
-        max_position_embeddings=4096 * 32,
-        initializer_range=0.02,
-        rms_norm_eps=1e-6,
-        use_cache=True,
-        pad_token_id=None,
-        bos_token_id=1,
-        eos_token_id=2,
-        tie_word_embeddings=False,
-        rope_theta=10000.0,
-        sliding_window=4096,
-        **kwargs,
-    ):
-        self.vocab_size = vocab_size
-        self.max_position_embeddings = max_position_embeddings
-        self.hidden_size = hidden_size
-        self.intermediate_size = intermediate_size
-        self.num_hidden_layers = num_hidden_layers
-        self.num_attention_heads = num_attention_heads
-        self.sliding_window = sliding_window
-        # for backward compatibility
-        if num_key_value_heads is None:
-            num_key_value_heads = num_attention_heads
-        self.num_key_value_heads = num_key_value_heads
-        self.hidden_act = hidden_act
-        self.initializer_range = initializer_range
-        self.rms_norm_eps = rms_norm_eps
-        self.use_cache = use_cache
-        self.rope_theta = rope_theta
-        super().__init__(
-            pad_token_id=pad_token_id,
-            bos_token_id=bos_token_id,
-            eos_token_id=eos_token_id,
-            tie_word_embeddings=tie_word_embeddings,
-            **kwargs,
-        )