Unverified Commit cb3f30c6 authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

Upgrade transformers version to 4.36.0 (#2046)

parent f3e024be
......@@ -10,7 +10,7 @@ numpy
tokenizers>=0.15.0
huggingface_hub<0.18,>=0.16.4
einops # Required for phi-1_5
transformers >= 4.34.0 # Required for Mistral.
transformers >= 4.36.0 # Required for Mixtral.
fastapi
uvicorn[standard]
pydantic == 1.10.13 # Required for OpenAI server.
......
......@@ -7,7 +7,7 @@ sentencepiece # Required for LLaMA tokenizer.
numpy
einops # Required for phi-1_5
torch >= 2.1.1
transformers >= 4.34.0 # Required for Mistral.
transformers >= 4.36.0 # Required for Mixtral.
xformers >= 0.0.23 # Required for CUDA 12.1.
fastapi
uvicorn[standard]
......
......@@ -29,7 +29,7 @@ import torch
import torch.nn.functional as F
from torch import nn
from transformers import MistralConfig
from transformers import MixtralConfig
try:
import megablocks.ops as ops
......@@ -395,7 +395,7 @@ class MixtralDecoderLayer(nn.Module):
def __init__(
self,
config: MistralConfig,
config: MixtralConfig,
) -> None:
super().__init__()
self.hidden_size = config.hidden_size
......@@ -443,7 +443,7 @@ class MixtralForCausalLM(nn.Module):
def __init__(
self,
config: MistralConfig,
config: MixtralConfig,
linear_method: Optional[LinearMethodBase] = None,
) -> None:
super().__init__()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment