"docs/source/vscode:/vscode.git/clone" did not exist on "32749512f4b799bf889bc59e73d51cac92ab7958"
Unverified Commit 45d5af24 authored by sixgod's avatar sixgod Committed by GitHub
Browse files

Add GLM-4 TextGeneration Model support for SGLang (#1736)

parent b121bc03
...@@ -303,6 +303,7 @@ You can view the full example [here](https://github.com/sgl-project/sglang/tree/ ...@@ -303,6 +303,7 @@ You can view the full example [here](https://github.com/sgl-project/sglang/tree/
- MiniCPM / MiniCPM 3 - MiniCPM / MiniCPM 3
- XVERSE / XVERSE MoE - XVERSE / XVERSE MoE
- SmolLM - SmolLM
- GLM-4
**Embedding Models** **Embedding Models**
......
...@@ -303,7 +303,7 @@ class GLMTransformer(nn.Module): ...@@ -303,7 +303,7 @@ class GLMTransformer(nn.Module):
return hidden_states return hidden_states
class ChatGLMModel(nn.Module): class ChatGLMM(nn.Module):
def __init__( def __init__(
self, self,
config, config,
...@@ -366,7 +366,7 @@ class ChatGLMForCausalLM(nn.Module): ...@@ -366,7 +366,7 @@ class ChatGLMForCausalLM(nn.Module):
self.config: ChatGLMConfig = config self.config: ChatGLMConfig = config
self.quant_config = quant_config self.quant_config = quant_config
self.max_position_embeddings = getattr(config, "max_sequence_length", 8192) self.max_position_embeddings = getattr(config, "max_sequence_length", 8192)
self.transformer = ChatGLMModel(config, cache_config, quant_config) self.transformer = ChatGLMM(config, cache_config, quant_config)
self.lm_head = self.transformer.output_layer self.lm_head = self.transformer.output_layer
self.logits_processor = LogitsProcessor(config) self.logits_processor = LogitsProcessor(config)
...@@ -401,4 +401,4 @@ class ChatGLMModel(ChatGLMForCausalLM): ...@@ -401,4 +401,4 @@ class ChatGLMModel(ChatGLMForCausalLM):
pass pass
EntryClass = [ChatGLMForCausalLM, ChatGLMModel] EntryClass = [ChatGLMModel]
...@@ -57,6 +57,7 @@ ALL_OTHER_MODELS = [ ...@@ -57,6 +57,7 @@ ALL_OTHER_MODELS = [
ModelCase("Qwen/Qwen2.5-14B-Instruct"), ModelCase("Qwen/Qwen2.5-14B-Instruct"),
ModelCase("HuggingFaceTB/SmolLM-135M-Instruct", skip_long_prompt=True), ModelCase("HuggingFaceTB/SmolLM-135M-Instruct", skip_long_prompt=True),
ModelCase("allenai/OLMo-1B-0724-hf", decode_tolerance=8e-2, skip_long_prompt=True), ModelCase("allenai/OLMo-1B-0724-hf", decode_tolerance=8e-2, skip_long_prompt=True),
ModelCase("THUDM/glm-4-9b-chat"),
] ]
TORCH_DTYPES = [torch.float16] TORCH_DTYPES = [torch.float16]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment