Unverified Commit 94aead9e authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Fix dependency (#538)

parent 9c902b19
...@@ -21,7 +21,7 @@ dependencies = [ ...@@ -21,7 +21,7 @@ dependencies = [
[project.optional-dependencies] [project.optional-dependencies]
srt = ["aiohttp", "fastapi", "psutil", "rpyc", "torch", "uvloop", "uvicorn", srt = ["aiohttp", "fastapi", "psutil", "rpyc", "torch", "uvloop", "uvicorn",
"zmq", "vllm==0.4.3", "interegular", "pydantic", "pillow", "packaging", "huggingface_hub", "hf_transfer", "outlines>=0.0.34"] "zmq", "vllm==0.4.3", "interegular", "pydantic", "pillow", "packaging", "huggingface_hub", "hf_transfer", "outlines>=0.0.41"]
openai = ["openai>=1.0", "tiktoken"] openai = ["openai>=1.0", "tiktoken"]
anthropic = ["anthropic>=0.20.0"] anthropic = ["anthropic>=0.20.0"]
litellm = ["litellm>=1.0.0"] litellm = ["litellm>=1.0.0"]
......
...@@ -3,10 +3,10 @@ Faster constrained decoding. ...@@ -3,10 +3,10 @@ Faster constrained decoding.
Reference: https://lmsys.org/blog/2024-02-05-compressed-fsm/ Reference: https://lmsys.org/blog/2024-02-05-compressed-fsm/
""" """
import interegular
import dataclasses import dataclasses
from collections import defaultdict from collections import defaultdict
import interegular
import outlines.caching import outlines.caching
from sglang.srt.constrained import ( from sglang.srt.constrained import (
FSMInfo, FSMInfo,
......
...@@ -5,7 +5,6 @@ ...@@ -5,7 +5,6 @@
from typing import Iterable, List, Optional, Tuple from typing import Iterable, List, Optional, Tuple
import torch import torch
from peft import LoraConfig
from sglang.srt.layers.radix_attention import RadixAttention from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.managers.controller.model_runner import InputMetadata from sglang.srt.managers.controller.model_runner import InputMetadata
from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.logits_processor import LogitsProcessor
...@@ -31,6 +30,8 @@ from vllm.sequence import SamplerOutput ...@@ -31,6 +30,8 @@ from vllm.sequence import SamplerOutput
from vllm.transformers_utils.configs import ChatGLMConfig from vllm.transformers_utils.configs import ChatGLMConfig
LoraConfig = None
class GLMAttention(nn.Module): class GLMAttention(nn.Module):
...@@ -387,4 +388,4 @@ class ChatGLMForCausalLM(nn.Module): ...@@ -387,4 +388,4 @@ class ChatGLMForCausalLM(nn.Module):
EntryClass = ChatGLMForCausalLM EntryClass = ChatGLMForCausalLM
# compat: glm model.config class == ChatGLMModel # compat: glm model.config class == ChatGLMModel
EntryClassRemapping = [("ChatGLMModel", ChatGLMForCausalLM)] EntryClassRemapping = [("ChatGLMModel", ChatGLMForCausalLM)]
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment