Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
56503d9b
"vscode:/vscode.git/clone" did not exist on "f2fcce5802458c091de1d2c63d9c8f77fae43c8c"
Unverified
Commit
56503d9b
authored
Oct 14, 2024
by
Byron Hsu
Committed by
GitHub
Oct 14, 2024
Browse files
[1/N] Remove `CacheConfig` import in all model files (#1658)
parent
02bc9579
Changes
30
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
48 additions
and
65 deletions
+48
-65
python/sglang/srt/models/baichuan.py
python/sglang/srt/models/baichuan.py
+2
-3
python/sglang/srt/models/chatglm.py
python/sglang/srt/models/chatglm.py
+5
-6
python/sglang/srt/models/commandr.py
python/sglang/srt/models/commandr.py
+1
-2
python/sglang/srt/models/dbrx.py
python/sglang/srt/models/dbrx.py
+1
-2
python/sglang/srt/models/deepseek.py
python/sglang/srt/models/deepseek.py
+4
-5
python/sglang/srt/models/deepseek_v2.py
python/sglang/srt/models/deepseek_v2.py
+5
-6
python/sglang/srt/models/exaone.py
python/sglang/srt/models/exaone.py
+1
-2
python/sglang/srt/models/gemma.py
python/sglang/srt/models/gemma.py
+2
-2
python/sglang/srt/models/gemma2.py
python/sglang/srt/models/gemma2.py
+5
-5
python/sglang/srt/models/gpt_bigcode.py
python/sglang/srt/models/gpt_bigcode.py
+5
-5
python/sglang/srt/models/grok.py
python/sglang/srt/models/grok.py
+1
-2
python/sglang/srt/models/internlm2.py
python/sglang/srt/models/internlm2.py
+1
-2
python/sglang/srt/models/llama.py
python/sglang/srt/models/llama.py
+1
-2
python/sglang/srt/models/llama_classification.py
python/sglang/srt/models/llama_classification.py
+1
-2
python/sglang/srt/models/llama_reward.py
python/sglang/srt/models/llama_reward.py
+2
-3
python/sglang/srt/models/llava.py
python/sglang/srt/models/llava.py
+3
-4
python/sglang/srt/models/llavavid.py
python/sglang/srt/models/llavavid.py
+1
-2
python/sglang/srt/models/minicpm.py
python/sglang/srt/models/minicpm.py
+1
-2
python/sglang/srt/models/minicpm3.py
python/sglang/srt/models/minicpm3.py
+5
-6
python/sglang/srt/models/mixtral.py
python/sglang/srt/models/mixtral.py
+1
-2
No files found.
python/sglang/srt/models/baichuan.py
View file @
56503d9b
...
...
@@ -24,7 +24,6 @@ from typing import Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
...
...
@@ -330,7 +329,7 @@ class BaiChuanBaseForCausalLM(nn.Module):
self
,
config
:
PretrainedConfig
,
position_embedding
:
str
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
):
super
().
__init__
()
...
...
@@ -404,7 +403,7 @@ class BaichuanForCausalLM(BaiChuanBaseForCausalLM):
def
__init__
(
self
,
config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
):
if
config
.
hidden_size
==
4096
:
# baichuan2 7b
...
...
python/sglang/srt/models/chatglm.py
View file @
56503d9b
...
...
@@ -22,7 +22,6 @@ from typing import Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
torch.nn
import
LayerNorm
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
...
...
@@ -52,7 +51,7 @@ class GLMAttention(nn.Module):
self
,
config
,
layer_id
:
int
=
0
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
):
super
().
__init__
()
...
...
@@ -188,7 +187,7 @@ class GLMBlock(nn.Module):
self
,
config
,
layer_id
:
int
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
):
super
().
__init__
()
...
...
@@ -260,7 +259,7 @@ class GLMTransformer(nn.Module):
def
__init__
(
self
,
config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
):
super
().
__init__
()
...
...
@@ -308,7 +307,7 @@ class ChatGLMModel(nn.Module):
def
__init__
(
self
,
config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
):
super
().
__init__
()
...
...
@@ -359,7 +358,7 @@ class ChatGLMForCausalLM(nn.Module):
def
__init__
(
self
,
config
:
ChatGLMConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
lora_config
:
Optional
[
LoraConfig
]
=
None
,
):
...
...
python/sglang/srt/models/commandr.py
View file @
56503d9b
...
...
@@ -45,7 +45,6 @@ import torch.utils.checkpoint
from
torch
import
nn
from
torch.nn.parameter
import
Parameter
from
transformers
import
PretrainedConfig
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
...
...
@@ -320,7 +319,7 @@ class CohereForCausalLM(nn.Module):
self
,
config
:
PretrainedConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
super
().
__init__
()
self
.
config
=
config
...
...
python/sglang/srt/models/dbrx.py
View file @
56503d9b
...
...
@@ -20,7 +20,6 @@ from typing import Iterable, Optional, Tuple
import
torch
import
torch.nn
as
nn
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
...
...
@@ -368,7 +367,7 @@ class DbrxForCausalLM(nn.Module):
self
,
config
:
DbrxConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
):
super
().
__init__
()
self
.
config
=
config
...
...
python/sglang/srt/models/deepseek.py
View file @
56503d9b
...
...
@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
...
...
@@ -185,7 +184,7 @@ class DeepseekAttention(nn.Module):
rope_theta
:
float
=
10000
,
rope_scaling
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
max_position_embeddings
:
int
=
8192
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
super
().
__init__
()
...
...
@@ -262,7 +261,7 @@ class DeepseekDecoderLayer(nn.Module):
self
,
config
:
PretrainedConfig
,
layer_id
:
int
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
super
().
__init__
()
...
...
@@ -331,7 +330,7 @@ class DeepseekModel(nn.Module):
def
__init__
(
self
,
config
:
PretrainedConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
super
().
__init__
()
...
...
@@ -374,7 +373,7 @@ class DeepseekForCausalLM(nn.Module):
def
__init__
(
self
,
config
:
PretrainedConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
super
().
__init__
()
...
...
python/sglang/srt/models/deepseek_v2.py
View file @
56503d9b
...
...
@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
(
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
...
...
@@ -188,7 +187,7 @@ class DeepseekV2Attention(nn.Module):
rope_theta
:
float
=
10000
,
rope_scaling
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
max_position_embeddings
:
int
=
8192
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
layer_id
=
None
,
)
->
None
:
...
...
@@ -336,7 +335,7 @@ class DeepseekV2AttentionMLA(nn.Module):
rope_theta
:
float
=
10000
,
rope_scaling
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
max_position_embeddings
:
int
=
8192
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
layer_id
=
None
,
)
->
None
:
...
...
@@ -498,7 +497,7 @@ class DeepseekV2DecoderLayer(nn.Module):
self
,
config
:
PretrainedConfig
,
layer_id
:
int
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
super
().
__init__
()
...
...
@@ -594,7 +593,7 @@ class DeepseekV2Model(nn.Module):
def
__init__
(
self
,
config
:
PretrainedConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
super
().
__init__
()
...
...
@@ -640,7 +639,7 @@ class DeepseekV2ForCausalLM(nn.Module):
def
__init__
(
self
,
config
:
PretrainedConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
super
().
__init__
()
...
...
python/sglang/srt/models/exaone.py
View file @
56503d9b
...
...
@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
...
...
@@ -295,7 +294,7 @@ class ExaoneForCausalLM(nn.Module):
self
,
config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
super
().
__init__
()
self
.
config
=
config
...
...
python/sglang/srt/models/gemma.py
View file @
56503d9b
...
...
@@ -21,7 +21,7 @@ from typing import Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.config
import
CacheConfig
,
LoRAConfig
from
vllm.config
import
LoRAConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.vocab_parallel_embedding
import
VocabParallelEmbedding
...
...
@@ -279,7 +279,7 @@ class GemmaForCausalLM(nn.Module):
config
:
PretrainedConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
lora_config
:
Optional
[
LoRAConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
del
lora_config
# Unused.
super
().
__init__
()
...
...
python/sglang/srt/models/gemma2.py
View file @
56503d9b
...
...
@@ -20,7 +20,7 @@ from typing import Iterable, Optional, Set, Tuple, Union
import
torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.config
import
CacheConfig
,
LoRAConfig
from
vllm.config
import
LoRAConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
# from vllm.model_executor.layers.rotary_embedding import GemmaRotaryEmbedding
...
...
@@ -105,7 +105,7 @@ class Gemma2Attention(nn.Module):
head_dim
:
int
,
max_position_embeddings
:
int
,
rope_theta
:
float
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
super
().
__init__
()
...
...
@@ -190,7 +190,7 @@ class Gemma2DecoderLayer(nn.Module):
self
,
layer_idx
:
int
,
config
:
PretrainedConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
super
().
__init__
()
...
...
@@ -257,7 +257,7 @@ class Gemma2Model(nn.Module):
def
__init__
(
self
,
config
:
PretrainedConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
super
().
__init__
()
...
...
@@ -336,7 +336,7 @@ class Gemma2ForCausalLM(nn.Module):
def
__init__
(
self
,
config
:
PretrainedConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
lora_config
:
Optional
[
LoRAConfig
]
=
None
,
)
->
None
:
...
...
python/sglang/srt/models/gpt_bigcode.py
View file @
56503d9b
...
...
@@ -21,7 +21,7 @@ from typing import Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
GPTBigCodeConfig
from
vllm.config
import
CacheConfig
,
LoRAConfig
from
vllm.config
import
LoRAConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.vocab_parallel_embedding
import
VocabParallelEmbedding
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
...
...
@@ -44,7 +44,7 @@ class GPTBigCodeAttention(nn.Module):
self
,
layer_id
:
int
,
config
:
GPTBigCodeConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
):
super
().
__init__
()
...
...
@@ -145,7 +145,7 @@ class GPTBigCodeBlock(nn.Module):
self
,
layer_id
:
int
,
config
:
GPTBigCodeConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
):
super
().
__init__
()
...
...
@@ -183,7 +183,7 @@ class GPTBigCodeModel(nn.Module):
def
__init__
(
self
,
config
:
GPTBigCodeConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
lora_config
:
Optional
[
LoRAConfig
]
=
None
,
):
...
...
@@ -243,7 +243,7 @@ class GPTBigCodeForCausalLM(nn.Module):
def
__init__
(
self
,
config
:
GPTBigCodeConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
lora_config
:
Optional
[
LoRAConfig
]
=
None
,
):
...
...
python/sglang/srt/models/grok.py
View file @
56503d9b
...
...
@@ -23,7 +23,6 @@ import torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
...
...
@@ -289,7 +288,7 @@ class Grok1ForCausalLM(nn.Module):
self
,
config
:
PretrainedConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
super
().
__init__
()
self
.
config
=
config
...
...
python/sglang/srt/models/internlm2.py
View file @
56503d9b
...
...
@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
...
...
@@ -254,7 +253,7 @@ class InternLM2ForCausalLM(nn.Module):
self
,
config
:
PretrainedConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
super
().
__init__
()
self
.
config
=
config
...
...
python/sglang/srt/models/llama.py
View file @
56503d9b
...
...
@@ -22,7 +22,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
LlamaConfig
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
...
...
@@ -295,7 +294,7 @@ class LlamaForCausalLM(nn.Module):
self
,
config
:
LlamaConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
super
().
__init__
()
self
.
config
=
config
...
...
python/sglang/srt/models/llama_classification.py
View file @
56503d9b
...
...
@@ -18,7 +18,6 @@ from typing import Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
LlamaConfig
from
vllm.config
import
CacheConfig
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.layers.logits_processor
import
LogitsProcessorOutput
...
...
@@ -32,7 +31,7 @@ class LlamaForClassification(nn.Module):
self
,
config
:
LlamaConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
super
().
__init__
()
self
.
config
=
config
...
...
python/sglang/srt/models/llama_reward.py
View file @
56503d9b
...
...
@@ -18,7 +18,6 @@ from typing import Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
LlamaConfig
from
vllm.config
import
CacheConfig
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.layers.logits_processor
import
LogitsProcessorOutput
...
...
@@ -33,7 +32,7 @@ class LlamaForSequenceClassification(nn.Module):
self
,
config
:
LlamaConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
super
().
__init__
()
self
.
config
=
config
...
...
@@ -92,7 +91,7 @@ class LlamaForSequenceClassificationWithNormal_Weights(LlamaForSequenceClassific
self
,
config
:
LlamaConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
super
().
__init__
(
config
,
quant_config
,
cache_config
)
self
.
weights
=
self
.
Weights
(
config
.
hidden_size
,
self
.
num_labels
)
...
...
python/sglang/srt/models/llava.py
View file @
56503d9b
...
...
@@ -31,7 +31,6 @@ from transformers import (
SiglipVisionModel
,
)
from
transformers.models.llava.modeling_llava
import
LlavaMultiModalProjector
from
vllm.config
import
CacheConfig
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
...
...
@@ -450,7 +449,7 @@ class LlavaLlamaForCausalLM(LlavaBaseForCausalLM):
self
,
config
:
LlavaConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
super
().
__init__
()
...
...
@@ -472,7 +471,7 @@ class LlavaQwenForCausalLM(LlavaBaseForCausalLM):
self
,
config
:
LlavaConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
super
().
__init__
()
...
...
@@ -505,7 +504,7 @@ class LlavaMistralForCausalLM(LlavaBaseForCausalLM):
self
,
config
:
LlavaConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
super
().
__init__
()
...
...
python/sglang/srt/models/llavavid.py
View file @
56503d9b
...
...
@@ -22,7 +22,6 @@ import torch
from
torch
import
nn
from
transformers
import
CLIPVisionModel
,
LlavaConfig
from
transformers.models.llava.modeling_llava
import
LlavaMultiModalProjector
from
vllm.config
import
CacheConfig
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
...
...
@@ -36,7 +35,7 @@ class LlavaVidForCausalLM(nn.Module):
self
,
config
:
LlavaConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
super
().
__init__
()
self
.
config
=
config
...
...
python/sglang/srt/models/minicpm.py
View file @
56503d9b
...
...
@@ -20,7 +20,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
...
...
@@ -278,7 +277,7 @@ class MiniCPMForCausalLM(nn.Module):
self
,
config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
super
().
__init__
()
self
.
config
=
config
...
...
python/sglang/srt/models/minicpm3.py
View file @
56503d9b
...
...
@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
...
...
@@ -108,7 +107,7 @@ class MiniCPM3Attention(nn.Module):
rope_theta
:
float
=
10000
,
rope_scaling
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
max_position_embeddings
:
int
=
8192
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
layer_id
=
None
,
)
->
None
:
...
...
@@ -252,7 +251,7 @@ class MiniCPM3AttentionMLA(nn.Module):
rope_theta
:
float
=
10000
,
rope_scaling
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
max_position_embeddings
:
int
=
8192
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
layer_id
=
None
,
)
->
None
:
...
...
@@ -409,7 +408,7 @@ class MiniCPM3DecoderLayer(nn.Module):
self
,
config
:
PretrainedConfig
,
layer_id
:
int
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
super
().
__init__
()
...
...
@@ -501,7 +500,7 @@ class MiniCPM3Model(nn.Module):
def
__init__
(
self
,
config
:
PretrainedConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
super
().
__init__
()
...
...
@@ -552,7 +551,7 @@ class MiniCPM3ForCausalLM(nn.Module):
def
__init__
(
self
,
config
:
PretrainedConfig
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
)
->
None
:
super
().
__init__
()
...
...
python/sglang/srt/models/mixtral.py
View file @
56503d9b
...
...
@@ -21,7 +21,6 @@ from typing import Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
MixtralConfig
from
vllm.config
import
CacheConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
...
...
@@ -293,7 +292,7 @@ class MixtralForCausalLM(nn.Module):
self
,
config
:
MixtralConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
=
None
,
)
->
None
:
super
().
__init__
()
self
.
config
=
config
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment